From 9049e4e2be5a66e23135aa69744d2b00ac8fd2b3 Mon Sep 17 00:00:00 2001 From: Roman Isecke <136338424+rbiseck3@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:01:34 -0400 Subject: [PATCH] feat/remove ingest code, use new dep for tests (#3595) ### Description Alternative to https://github.com/Unstructured-IO/unstructured/pull/3572 but maintaining all ingest tests, running them by pulling in the latest version of unstructured-ingest. --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: rbiseck3 Co-authored-by: Christine Straub Co-authored-by: christinestraub --- .github/actions/base-ingest-cache/action.yml | 2 +- .github/workflows/ci.yml | 131 -- .../ingest-test-fixtures-update-pr.yml | 1 + CHANGELOG.md | 7 +- MANIFEST.in | 42 - Makefile | 176 +- docs/requirements.in | 2 +- docs/requirements.txt | 16 +- requirements/Makefile | 19 +- requirements/base.txt | 15 +- requirements/cache.txt | 1 - requirements/dev.txt | 6 +- requirements/extra-csv.txt | 4 +- requirements/extra-paddleocr.txt | 8 +- requirements/extra-pdf-image.txt | 20 +- requirements/extra-xlsx.txt | 4 +- requirements/huggingface.txt | 2 +- requirements/ingest/airtable.in | 3 - requirements/ingest/airtable.txt | 44 - requirements/ingest/astradb.in | 3 - requirements/ingest/astradb.txt | 100 -- requirements/ingest/azure-cognitive-search.in | 3 - .../ingest/azure-cognitive-search.txt | 45 - requirements/ingest/azure.in | 4 - requirements/ingest/azure.txt | 108 -- requirements/ingest/biomed.in | 3 - requirements/ingest/biomed.txt | 16 - requirements/ingest/box.in | 4 - requirements/ingest/box.txt | 65 - requirements/ingest/chroma.in | 10 - requirements/ingest/chroma.txt | 256 --- requirements/ingest/clarifai.in | 3 - requirements/ingest/clarifai.txt | 83 - requirements/ingest/confluence.in | 3 - requirements/ingest/confluence.txt | 56 - requirements/ingest/databricks-volumes.in | 3 - requirements/ingest/databricks-volumes.txt | 41 - requirements/ingest/delta-table.in | 4 - requirements/ingest/delta-table.txt | 16 - requirements/ingest/discord.in | 3 - requirements/ingest/discord.txt | 36 - requirements/ingest/dropbox.in | 4 - requirements/ingest/dropbox.txt | 45 - requirements/ingest/elasticsearch.in | 3 - requirements/ingest/elasticsearch.txt | 47 - requirements/ingest/embed-aws-bedrock.in | 4 - requirements/ingest/embed-aws-bedrock.txt | 191 --- requirements/ingest/embed-huggingface.in | 4 - requirements/ingest/embed-huggingface.txt | 170 -- requirements/ingest/embed-mixedbreadai.in | 3 - requirements/ingest/embed-mixedbreadai.txt | 56 - requirements/ingest/embed-octoai.in | 4 - requirements/ingest/embed-octoai.txt | 87 - requirements/ingest/embed-openai.in | 4 - requirements/ingest/embed-openai.txt | 113 -- requirements/ingest/embed-vertexai.in | 5 - requirements/ingest/embed-vertexai.txt | 275 ---- requirements/ingest/embed-voyageai.in | 4 - requirements/ingest/embed-voyageai.txt | 147 -- requirements/ingest/gcs.in | 5 - requirements/ingest/gcs.txt | 120 -- requirements/ingest/github.in | 4 - requirements/ingest/github.txt | 57 - requirements/ingest/gitlab.in | 3 - requirements/ingest/gitlab.txt | 34 - requirements/ingest/google-drive.in | 3 - requirements/ingest/google-drive.txt | 66 - requirements/ingest/hubspot.in | 4 - requirements/ingest/hubspot.txt | 27 - requirements/ingest/ingest.txt | 5 + requirements/ingest/jira.in | 3 - requirements/ingest/jira.txt | 56 - requirements/ingest/kafka.in | 3 - requirements/ingest/kafka.txt | 8 - requirements/ingest/mongodb.in | 3 - requirements/ingest/mongodb.txt | 10 - requirements/ingest/notion.in | 4 - requirements/ingest/notion.txt | 49 - requirements/ingest/onedrive.in | 5 - requirements/ingest/onedrive.txt | 65 - requirements/ingest/opensearch.in | 3 - requirements/ingest/opensearch.txt | 41 - requirements/ingest/outlook.in | 4 - requirements/ingest/outlook.txt | 55 - requirements/ingest/pinecone.in | 3 - requirements/ingest/pinecone.txt | 31 - requirements/ingest/postgres.in | 3 - requirements/ingest/postgres.txt | 8 - requirements/ingest/qdrant.in | 3 - requirements/ingest/qdrant.txt | 86 - requirements/ingest/reddit.in | 3 - requirements/ingest/reddit.txt | 36 - requirements/ingest/s3.in | 4 - requirements/ingest/s3.txt | 70 - requirements/ingest/salesforce.in | 3 - requirements/ingest/salesforce.txt | 76 - requirements/ingest/sftp.in | 4 - requirements/ingest/sftp.txt | 27 - requirements/ingest/sharepoint.in | 4 - requirements/ingest/sharepoint.txt | 55 - requirements/ingest/singlestore.in | 3 - requirements/ingest/singlestore.txt | 62 - requirements/ingest/slack.in | 3 - requirements/ingest/slack.txt | 8 - requirements/ingest/weaviate.in | 3 - requirements/ingest/weaviate.txt | 45 - requirements/ingest/wikipedia.in | 3 - requirements/ingest/wikipedia.txt | 37 - requirements/test.txt | 14 +- .../create_scale_test_components.py | 2 +- scripts/consistent-deps.sh | 4 +- setup.py | 48 - test_unstructured/embed/test_mixedbreadai.py | 4 +- test_unstructured/embed/test_octoai.py | 4 +- test_unstructured/embed/test_openai.py | 4 +- test_unstructured/embed/test_vertexai.py | 2 +- test_unstructured/embed/test_voyageai.py | 4 +- .../ingest/utils/test_compression.py | 15 - test_unstructured/test_utils.py | 27 - test_unstructured_ingest/dest/astradb.sh | 64 - .../dest/azure-cognitive-search.sh | 121 -- test_unstructured_ingest/dest/azure.sh | 58 - test_unstructured_ingest/dest/box.sh | 55 - test_unstructured_ingest/dest/chroma.sh | 61 - test_unstructured_ingest/dest/clarifai.sh | 114 -- .../dest/databricks-volumes.sh | 63 - test_unstructured_ingest/dest/delta-table.sh | 46 - test_unstructured_ingest/dest/dropbox.sh | 80 - .../dest/elasticsearch.sh | 67 - test_unstructured_ingest/dest/gcs.sh | 61 - test_unstructured_ingest/dest/kafka-local.sh | 66 - test_unstructured_ingest/dest/mongodb.sh | 77 - test_unstructured_ingest/dest/opensearch.sh | 56 - test_unstructured_ingest/dest/pgvector.sh | 54 - test_unstructured_ingest/dest/pinecone.sh | 134 -- test_unstructured_ingest/dest/qdrant.sh | 87 - test_unstructured_ingest/dest/s3.sh | 55 - .../dest/sharepoint-embed-cog-index.sh | 135 -- test_unstructured_ingest/dest/singlestore.sh | 65 - test_unstructured_ingest/dest/sqlite.sh | 52 - test_unstructured_ingest/dest/vectara.sh | 94 -- test_unstructured_ingest/dest/weaviate.sh | 51 - ...iomedical-Data-Scientists-2-pages.pdf.json | 52 +- .../azure/IRS-form-1987.pdf.json | 160 +- .../azure/IRS-form-1987.png.json | 82 +- .../azure/rfc854.txt.json | 280 ++-- .../azure/spring-weather.html.json | 184 +-- .../box/handbook-1p.docx.json | 30 +- .../box/nested-1/ideas-page.html.json | 2 +- .../nested-1/nested-2/ideas-page.html.json | 2 +- .../box/science-exploration-1p.pptx.json | 26 +- .../dropbox/handbook-1p.docx.json | 120 +- .../dropbox/nested-1/ideas-page.html.json | 8 +- .../dropbox/nested-2/ideas-page.html.json | 8 +- .../dropbox/science-exploration-1p.pptx.json | 104 +- .../gcs/ideas-page.html.json | 2 +- .../gcs/nested-1/fake-text.txt.json | 12 +- .../gcs/nested-1/nested/ideas-page.html.json | 2 +- .../gcs/nested-2/fake-text.txt.json | 12 +- .../gcs/nested-2/nested/ideas-page.html.json | 2 +- .../gcs/nested-2/stanley-cups.xlsx.json | 8 +- test_unstructured_ingest/src/against-api.sh | 2 +- test_unstructured_ingest/src/airtable-diff.sh | 3 +- .../src/airtable-large.sh | 2 +- test_unstructured_ingest/src/astradb.sh | 3 +- test_unstructured_ingest/src/azure.sh | 4 +- test_unstructured_ingest/src/biomed-api.sh | 2 +- test_unstructured_ingest/src/biomed-path.sh | 2 +- test_unstructured_ingest/src/box.sh | 4 +- .../src/confluence-diff.sh | 2 +- .../src/confluence-large.sh | 2 +- test_unstructured_ingest/src/delta-table.sh | 2 +- test_unstructured_ingest/src/discord.sh | 2 +- test_unstructured_ingest/src/dropbox.sh | 4 +- test_unstructured_ingest/src/elasticsearch.sh | 4 +- test_unstructured_ingest/src/gcs.sh | 4 +- test_unstructured_ingest/src/github.sh | 2 +- test_unstructured_ingest/src/gitlab.sh | 2 +- test_unstructured_ingest/src/google-drive.sh | 6 +- test_unstructured_ingest/src/hubspot.sh | 3 +- test_unstructured_ingest/src/jira.sh | 2 +- test_unstructured_ingest/src/kafka-local.sh | 2 +- .../src/local-embed-bedrock.sh | 6 +- .../src/local-embed-mixedbreadai.sh | 4 +- .../src/local-embed-octoai.sh | 2 +- .../src/local-embed-vertexai.sh | 6 +- .../src/local-embed-voyageai.sh | 4 +- test_unstructured_ingest/src/local-embed.sh | 6 +- .../src/local-failed-partition.sh | 2 +- .../src/local-single-file-basic-chunking.sh | 3 +- ...ocal-single-file-chunk-no-orig-elements.sh | 7 +- .../src/local-single-file-with-encoding.sh | 4 +- ...gle-file-with-pdf-infer-table-structure.sh | 4 +- .../src/local-single-file.sh | 4 +- test_unstructured_ingest/src/local.sh | 2 +- test_unstructured_ingest/src/mongodb.sh | 5 +- test_unstructured_ingest/src/notion.sh | 2 +- test_unstructured_ingest/src/onedrive.sh | 4 +- test_unstructured_ingest/src/opensearch.sh | 4 +- test_unstructured_ingest/src/outlook.sh | 2 +- .../src/pdf-fast-reprocess.sh | 4 +- .../src/s3-compression.sh | 2 +- test_unstructured_ingest/src/s3-minio.sh | 4 +- test_unstructured_ingest/src/s3.sh | 4 +- test_unstructured_ingest/src/salesforce.sh | 6 +- test_unstructured_ingest/src/sftp.sh | 4 +- .../src/sharepoint-with-permissions.sh | 2 +- test_unstructured_ingest/src/sharepoint.sh | 2 +- test_unstructured_ingest/src/slack.sh | 2 +- test_unstructured_ingest/src/wikipedia.sh | 2 +- test_unstructured_ingest/test-help.sh | 14 - test_unstructured_ingest/test-ingest-src.sh | 3 +- test_unstructured_ingest/unit/cli/test_cli.py | 18 - .../connector/fsspec/test_connector_gcs.py | 35 - .../unit/connector/fsspec/test_fsspec.py | 25 - .../unit/connector/fsspec/test_paths.py | 223 --- .../unit/connector/test_connector_git.py | 61 - .../connector/test_salesforce_connector.py | 57 - .../unit/connector/test_serialization.py | 46 - .../unit/connector/test_sharepoint.py | 59 - .../unit/connector/test_sql_conform_dict.py | 169 -- .../test_enhanced_dataclass.py | 60 - .../unit/pipeline/reformat/test_chunking.py | 156 -- test_unstructured_ingest/unit/test_error.py | 27 - .../unit/test_interfaces.py | 281 ---- test_unstructured_ingest/unit/test_logger.py | 78 - test_unstructured_ingest/unit/test_utils.py | 164 -- unstructured/__version__.py | 2 +- unstructured/embed/bedrock.py | 74 +- unstructured/embed/huggingface.py | 67 +- unstructured/embed/interfaces.py | 8 +- unstructured/embed/mixedbreadai.py | 69 +- unstructured/embed/octoai.py | 55 +- unstructured/embed/openai.py | 62 +- unstructured/embed/vertexai.py | 77 +- unstructured/embed/voyageai.py | 70 +- unstructured/ingest/README.md | 6 - unstructured/ingest/__init__.py | 10 - unstructured/ingest/cli/__init__.py | 14 - unstructured/ingest/cli/base/__init__.py | 0 unstructured/ingest/cli/base/cmd.py | 19 - unstructured/ingest/cli/base/dest.py | 87 - unstructured/ingest/cli/base/src.py | 57 - unstructured/ingest/cli/cli.py | 32 - unstructured/ingest/cli/cmd_factory.py | 12 - unstructured/ingest/cli/cmds/__init__.py | 145 -- unstructured/ingest/cli/cmds/airtable.py | 69 - unstructured/ingest/cli/cmds/astradb.py | 99 -- .../ingest/cli/cmds/azure_cognitive_search.py | 65 - unstructured/ingest/cli/cmds/biomed.py | 52 - unstructured/ingest/cli/cmds/chroma.py | 104 -- unstructured/ingest/cli/cmds/clarifai.py | 71 - unstructured/ingest/cli/cmds/confluence.py | 69 - .../ingest/cli/cmds/databricks_volumes.py | 163 -- unstructured/ingest/cli/cmds/delta_table.py | 94 -- unstructured/ingest/cli/cmds/discord.py | 47 - unstructured/ingest/cli/cmds/elasticsearch.py | 133 -- .../ingest/cli/cmds/fsspec/__init__.py | 0 unstructured/ingest/cli/cmds/fsspec/azure.py | 94 -- unstructured/ingest/cli/cmds/fsspec/box.py | 48 - .../ingest/cli/cmds/fsspec/dropbox.py | 51 - unstructured/ingest/cli/cmds/fsspec/fsspec.py | 15 - unstructured/ingest/cli/cmds/fsspec/gcs.py | 71 - unstructured/ingest/cli/cmds/fsspec/s3.py | 74 - unstructured/ingest/cli/cmds/fsspec/sftp.py | 58 - unstructured/ingest/cli/cmds/github.py | 54 - unstructured/ingest/cli/cmds/gitlab.py | 54 - unstructured/ingest/cli/cmds/google_drive.py | 49 - unstructured/ingest/cli/cmds/hubspot.py | 70 - unstructured/ingest/cli/cmds/jira.py | 71 - unstructured/ingest/cli/cmds/kafka.py | 102 -- unstructured/ingest/cli/cmds/local.py | 43 - unstructured/ingest/cli/cmds/mongodb.py | 72 - unstructured/ingest/cli/cmds/notion.py | 48 - unstructured/ingest/cli/cmds/onedrive.py | 66 - unstructured/ingest/cli/cmds/opensearch.py | 117 -- unstructured/ingest/cli/cmds/outlook.py | 67 - unstructured/ingest/cli/cmds/pinecone.py | 71 - unstructured/ingest/cli/cmds/qdrant.py | 124 -- unstructured/ingest/cli/cmds/reddit.py | 67 - unstructured/ingest/cli/cmds/salesforce.py | 58 - unstructured/ingest/cli/cmds/sharepoint.py | 66 - unstructured/ingest/cli/cmds/slack.py | 56 - unstructured/ingest/cli/cmds/sql.py | 66 - unstructured/ingest/cli/cmds/vectara.py | 66 - unstructured/ingest/cli/cmds/weaviate.py | 98 -- unstructured/ingest/cli/cmds/wikipedia.py | 40 - unstructured/ingest/cli/common.py | 7 - unstructured/ingest/cli/interfaces.py | 656 -------- unstructured/ingest/cli/utils.py | 205 --- unstructured/ingest/connector/__init__.py | 0 unstructured/ingest/connector/airtable.py | 309 ---- unstructured/ingest/connector/astradb.py | 238 --- .../connector/azure_cognitive_search.py | 142 -- unstructured/ingest/connector/biomed.py | 313 ---- unstructured/ingest/connector/chroma.py | 159 -- unstructured/ingest/connector/clarifai.py | 122 -- unstructured/ingest/connector/confluence.py | 285 ---- .../ingest/connector/databricks_volumes.py | 137 -- unstructured/ingest/connector/delta_table.py | 203 --- unstructured/ingest/connector/discord.py | 180 --- .../ingest/connector/elasticsearch.py | 397 ----- .../ingest/connector/fsspec/__init__.py | 0 unstructured/ingest/connector/fsspec/azure.py | 78 - unstructured/ingest/connector/fsspec/box.py | 109 -- .../ingest/connector/fsspec/dropbox.py | 160 -- .../ingest/connector/fsspec/fsspec.py | 359 ----- unstructured/ingest/connector/fsspec/gcs.py | 82 - unstructured/ingest/connector/fsspec/s3.py | 62 - unstructured/ingest/connector/fsspec/sftp.py | 81 - unstructured/ingest/connector/git.py | 124 -- unstructured/ingest/connector/github.py | 173 -- unstructured/ingest/connector/gitlab.py | 142 -- unstructured/ingest/connector/google_drive.py | 348 ---- unstructured/ingest/connector/hubspot.py | 278 ---- unstructured/ingest/connector/jira.py | 469 ------ unstructured/ingest/connector/kafka.py | 294 ---- unstructured/ingest/connector/local.py | 139 -- unstructured/ingest/connector/mongodb.py | 284 ---- .../ingest/connector/notion/__init__.py | 0 .../ingest/connector/notion/client.py | 233 --- .../ingest/connector/notion/connector.py | 468 ------ .../ingest/connector/notion/helpers.py | 584 ------- .../ingest/connector/notion/interfaces.py | 32 - .../ingest/connector/notion/types/__init__.py | 0 .../ingest/connector/notion/types/block.py | 95 -- .../connector/notion/types/blocks/__init__.py | 63 - .../connector/notion/types/blocks/bookmark.py | 40 - .../notion/types/blocks/breadcrumb.py | 21 - .../notion/types/blocks/bulleted_list_item.py | 31 - .../connector/notion/types/blocks/callout.py | 94 -- .../notion/types/blocks/child_database.py | 23 - .../notion/types/blocks/child_page.py | 23 - .../connector/notion/types/blocks/code.py | 43 - .../notion/types/blocks/column_list.py | 35 - .../connector/notion/types/blocks/divider.py | 22 - .../connector/notion/types/blocks/embed.py | 36 - .../connector/notion/types/blocks/equation.py | 23 - .../connector/notion/types/blocks/file.py | 49 - .../connector/notion/types/blocks/heading.py | 37 - .../connector/notion/types/blocks/image.py | 21 - .../notion/types/blocks/link_preview.py | 24 - .../notion/types/blocks/link_to_page.py | 29 - .../notion/types/blocks/numbered_list.py | 29 - .../notion/types/blocks/paragraph.py | 31 - .../connector/notion/types/blocks/pdf.py | 49 - .../connector/notion/types/blocks/quote.py | 37 - .../notion/types/blocks/synced_block.py | 57 - .../connector/notion/types/blocks/table.py | 63 - .../notion/types/blocks/table_of_contents.py | 23 - .../connector/notion/types/blocks/template.py | 30 - .../connector/notion/types/blocks/todo.py | 42 - .../connector/notion/types/blocks/toggle.py | 37 - .../notion/types/blocks/unsupported.py | 20 - .../connector/notion/types/blocks/video.py | 22 - .../ingest/connector/notion/types/database.py | 72 - .../types/database_properties/__init__.py | 106 -- .../types/database_properties/checkbox.py | 38 - .../types/database_properties/created_by.py | 35 - .../types/database_properties/created_time.py | 34 - .../notion/types/database_properties/date.py | 41 - .../notion/types/database_properties/email.py | 36 - .../notion/types/database_properties/files.py | 37 - .../types/database_properties/formula.py | 49 - .../database_properties/last_edited_by.py | 34 - .../database_properties/last_edited_time.py | 34 - .../types/database_properties/multiselect.py | 73 - .../types/database_properties/number.py | 49 - .../types/database_properties/people.py | 40 - .../types/database_properties/phone_number.py | 36 - .../types/database_properties/relation.py | 67 - .../types/database_properties/rich_text.py | 43 - .../types/database_properties/rollup.py | 56 - .../types/database_properties/select.py | 68 - .../types/database_properties/status.py | 80 - .../notion/types/database_properties/title.py | 37 - .../types/database_properties/unique_id.py | 50 - .../notion/types/database_properties/url.py | 37 - .../types/database_properties/verification.py | 78 - .../ingest/connector/notion/types/date.py | 26 - .../ingest/connector/notion/types/file.py | 51 - .../ingest/connector/notion/types/page.py | 44 - .../ingest/connector/notion/types/parent.py | 66 - .../connector/notion/types/rich_text.py | 189 --- .../ingest/connector/notion/types/user.py | 76 - unstructured/ingest/connector/onedrive.py | 232 --- unstructured/ingest/connector/opensearch.py | 219 --- unstructured/ingest/connector/outlook.py | 285 ---- unstructured/ingest/connector/pinecone.py | 142 -- unstructured/ingest/connector/qdrant.py | 145 -- unstructured/ingest/connector/reddit.py | 166 -- unstructured/ingest/connector/registry.py | 109 -- unstructured/ingest/connector/salesforce.py | 301 ---- unstructured/ingest/connector/sharepoint.py | 573 ------- unstructured/ingest/connector/slack.py | 224 --- unstructured/ingest/connector/sql.py | 196 --- unstructured/ingest/connector/vectara.py | 248 --- unstructured/ingest/connector/weaviate.py | 187 --- unstructured/ingest/connector/wikipedia.py | 208 --- .../ingest/enhanced_dataclass/__init__.py | 4 - .../ingest/enhanced_dataclass/core.py | 99 -- .../ingest/enhanced_dataclass/dataclasses.py | 54 - .../ingest/enhanced_dataclass/json_mixin.py | 125 -- unstructured/ingest/error.py | 49 - unstructured/ingest/evaluate.py | 349 ---- ...structured_ingest_cli_pipeline_diagram.png | Bin 91991 -> 0 bytes .../ingest/ingest_backoff/__init__.py | 3 - unstructured/ingest/ingest_backoff/_common.py | 102 -- .../ingest/ingest_backoff/_wrapper.py | 122 -- unstructured/ingest/interfaces.py | 845 ---------- unstructured/ingest/logger.py | 130 -- unstructured/ingest/main.py | 11 - unstructured/ingest/pipeline/__init__.py | 22 - unstructured/ingest/pipeline/copy.py | 19 - unstructured/ingest/pipeline/doc_factory.py | 12 - unstructured/ingest/pipeline/interfaces.py | 265 --- unstructured/ingest/pipeline/partition.py | 60 - unstructured/ingest/pipeline/permissions.py | 12 - unstructured/ingest/pipeline/pipeline.py | 117 -- .../ingest/pipeline/reformat/__init__.py | 0 .../ingest/pipeline/reformat/chunking.py | 129 -- .../ingest/pipeline/reformat/embedding.py | 65 - unstructured/ingest/pipeline/source.py | 77 - unstructured/ingest/pipeline/utils.py | 6 - unstructured/ingest/pipeline/write.py | 18 - unstructured/ingest/processor.py | 93 -- unstructured/ingest/runner/__init__.py | 104 -- unstructured/ingest/runner/airtable.py | 35 - unstructured/ingest/runner/astradb.py | 34 - unstructured/ingest/runner/base_runner.py | 89 -- unstructured/ingest/runner/biomed.py | 45 - unstructured/ingest/runner/confluence.py | 35 - unstructured/ingest/runner/delta_table.py | 34 - unstructured/ingest/runner/discord.py | 35 - unstructured/ingest/runner/elasticsearch.py | 40 - unstructured/ingest/runner/fsspec/__init__.py | 0 unstructured/ingest/runner/fsspec/azure.py | 30 - unstructured/ingest/runner/fsspec/box.py | 28 - unstructured/ingest/runner/fsspec/dropbox.py | 30 - unstructured/ingest/runner/fsspec/fsspec.py | 40 - unstructured/ingest/runner/fsspec/gcs.py | 28 - unstructured/ingest/runner/fsspec/s3.py | 28 - unstructured/ingest/runner/fsspec/sftp.py | 28 - unstructured/ingest/runner/github.py | 37 - unstructured/ingest/runner/gitlab.py | 37 - unstructured/ingest/runner/google_drive.py | 35 - unstructured/ingest/runner/hubspot.py | 35 - unstructured/ingest/runner/jira.py | 35 - unstructured/ingest/runner/kafka.py | 34 - unstructured/ingest/runner/local.py | 23 - unstructured/ingest/runner/mongodb.py | 34 - unstructured/ingest/runner/notion.py | 61 - unstructured/ingest/runner/onedrive.py | 35 - unstructured/ingest/runner/opensearch.py | 40 - unstructured/ingest/runner/outlook.py | 33 - unstructured/ingest/runner/reddit.py | 35 - unstructured/ingest/runner/salesforce.py | 33 - unstructured/ingest/runner/sharepoint.py | 35 - unstructured/ingest/runner/slack.py | 33 - unstructured/ingest/runner/utils.py | 47 - unstructured/ingest/runner/wikipedia.py | 35 - .../ingest/runner/writers/__init__.py | 48 - unstructured/ingest/runner/writers/astradb.py | 22 - .../runner/writers/azure_cognitive_search.py | 24 - .../ingest/runner/writers/base_writer.py | 26 - unstructured/ingest/runner/writers/chroma.py | 22 - .../ingest/runner/writers/clarifai.py | 19 - .../runner/writers/databricks_volumes.py | 25 - .../ingest/runner/writers/delta_table.py | 24 - .../ingest/runner/writers/elasticsearch.py | 24 - .../ingest/runner/writers/fsspec/__init__.py | 0 .../ingest/runner/writers/fsspec/azure.py | 24 - .../ingest/runner/writers/fsspec/box.py | 21 - .../ingest/runner/writers/fsspec/dropbox.py | 21 - .../ingest/runner/writers/fsspec/gcs.py | 19 - .../ingest/runner/writers/fsspec/s3.py | 21 - unstructured/ingest/runner/writers/kafka.py | 21 - unstructured/ingest/runner/writers/mongodb.py | 21 - .../ingest/runner/writers/opensearch.py | 26 - .../ingest/runner/writers/pinecone.py | 21 - unstructured/ingest/runner/writers/qdrant.py | 19 - unstructured/ingest/runner/writers/sql.py | 22 - unstructured/ingest/runner/writers/vectara.py | 22 - .../ingest/runner/writers/weaviate.py | 21 - unstructured/ingest/utils/__init__.py | 0 unstructured/ingest/utils/compression.py | 117 -- unstructured/ingest/utils/data_prep.py | 29 - .../ingest/utils/string_and_date_utils.py | 39 - unstructured/ingest/utils/table.py | 24 - unstructured/ingest/v2/README.md | 6 - unstructured/ingest/v2/__init__.py | 1 - .../ingest/v2/assets/pipeline.excalidraw | 1417 ----------------- unstructured/ingest/v2/assets/pipeline.png | Bin 305326 -> 0 bytes unstructured/ingest/v2/assets/sequence.png | Bin 346247 -> 0 bytes unstructured/ingest/v2/assets/sequence.txt | 38 - unstructured/ingest/v2/cli/README.md | 28 - unstructured/ingest/v2/cli/__init__.py | 0 unstructured/ingest/v2/cli/base/__init__.py | 4 - unstructured/ingest/v2/cli/base/cmd.py | 215 --- unstructured/ingest/v2/cli/base/dest.py | 76 - unstructured/ingest/v2/cli/base/importer.py | 34 - unstructured/ingest/v2/cli/base/src.py | 70 - unstructured/ingest/v2/cli/cli.py | 24 - unstructured/ingest/v2/cli/cmds/__init__.py | 87 - unstructured/ingest/v2/cli/cmds/astradb.py | 85 - .../v2/cli/cmds/azure_cognitive_search.py | 72 - unstructured/ingest/v2/cli/cmds/chroma.py | 108 -- .../ingest/v2/cli/cmds/databricks_volumes.py | 161 -- .../ingest/v2/cli/cmds/elasticsearch.py | 159 -- .../ingest/v2/cli/cmds/fsspec/__init__.py | 0 .../ingest/v2/cli/cmds/fsspec/azure.py | 84 - unstructured/ingest/v2/cli/cmds/fsspec/box.py | 58 - .../ingest/v2/cli/cmds/fsspec/dropbox.py | 58 - .../ingest/v2/cli/cmds/fsspec/fsspec.py | 77 - unstructured/ingest/v2/cli/cmds/fsspec/gcs.py | 81 - unstructured/ingest/v2/cli/cmds/fsspec/s3.py | 84 - .../ingest/v2/cli/cmds/fsspec/sftp.py | 80 - .../ingest/v2/cli/cmds/google_drive.py | 74 - unstructured/ingest/v2/cli/cmds/local.py | 60 - unstructured/ingest/v2/cli/cmds/mongodb.py | 62 - unstructured/ingest/v2/cli/cmds/onedrive.py | 91 -- unstructured/ingest/v2/cli/cmds/opensearch.py | 93 -- unstructured/ingest/v2/cli/cmds/pinecone.py | 62 - unstructured/ingest/v2/cli/cmds/salesforce.py | 79 - unstructured/ingest/v2/cli/cmds/sharepoint.py | 112 -- .../ingest/v2/cli/cmds/singlestore.py | 96 -- unstructured/ingest/v2/cli/cmds/sql.py | 84 - unstructured/ingest/v2/cli/cmds/weaviate.py | 100 -- .../ingest/v2/cli/configs/__init__.py | 6 - unstructured/ingest/v2/cli/configs/chunk.py | 89 -- unstructured/ingest/v2/cli/configs/embed.py | 74 - .../ingest/v2/cli/configs/partition.py | 99 -- .../ingest/v2/cli/configs/processor.py | 88 - unstructured/ingest/v2/cli/interfaces.py | 27 - unstructured/ingest/v2/cli/utils.py | 240 --- unstructured/ingest/v2/example.py | 37 - .../example_azure_cognitive_search.py | 52 - .../ingest/v2/examples/example_chroma.py | 53 - .../v2/examples/example_databricks_volumes.py | 54 - .../v2/examples/example_elasticsearch.py | 48 - .../ingest/v2/examples/example_local.py | 35 - .../ingest/v2/examples/example_mongodb.py | 52 - .../ingest/v2/examples/example_opensearch.py | 51 - .../ingest/v2/examples/example_pinecone.py | 56 - unstructured/ingest/v2/examples/example_s3.py | 35 - .../ingest/v2/examples/example_salesforce.py | 43 - .../ingest/v2/examples/example_sharepoint.py | 46 - .../ingest/v2/examples/example_singlestore.py | 48 - .../ingest/v2/examples/example_sql.py | 88 - .../ingest/v2/examples/example_weaviate.py | 44 - unstructured/ingest/v2/interfaces/__init__.py | 29 - .../ingest/v2/interfaces/connector.py | 32 - .../ingest/v2/interfaces/downloader.py | 79 - .../ingest/v2/interfaces/file_data.py | 56 - unstructured/ingest/v2/interfaces/indexer.py | 28 - unstructured/ingest/v2/interfaces/process.py | 20 - .../ingest/v2/interfaces/processor.py | 48 - .../ingest/v2/interfaces/upload_stager.py | 48 - unstructured/ingest/v2/interfaces/uploader.py | 39 - unstructured/ingest/v2/logger.py | 123 -- unstructured/ingest/v2/main.py | 11 - unstructured/ingest/v2/pipeline/__init__.py | 0 unstructured/ingest/v2/pipeline/interfaces.py | 169 -- unstructured/ingest/v2/pipeline/pipeline.py | 286 ---- .../ingest/v2/pipeline/steps/__init__.py | 0 .../ingest/v2/pipeline/steps/chunk.py | 84 - .../ingest/v2/pipeline/steps/download.py | 124 -- .../ingest/v2/pipeline/steps/embed.py | 83 - .../ingest/v2/pipeline/steps/index.py | 65 - .../ingest/v2/pipeline/steps/partition.py | 78 - .../ingest/v2/pipeline/steps/stage.py | 64 - .../ingest/v2/pipeline/steps/uncompress.py | 68 - .../ingest/v2/pipeline/steps/upload.py | 73 - unstructured/ingest/v2/pipeline/utils.py | 16 - unstructured/ingest/v2/processes/__init__.py | 0 unstructured/ingest/v2/processes/chunker.py | 96 -- .../ingest/v2/processes/connector_registry.py | 63 - .../v2/processes/connectors/__init__.py | 76 - .../ingest/v2/processes/connectors/astradb.py | 151 -- .../connectors/azure_cognitive_search.py | 208 --- .../ingest/v2/processes/connectors/chroma.py | 208 --- .../connectors/databricks_volumes.py | 96 -- .../v2/processes/connectors/elasticsearch.py | 401 ----- .../processes/connectors/fsspec/__init__.py | 37 - .../v2/processes/connectors/fsspec/azure.py | 144 -- .../v2/processes/connectors/fsspec/box.py | 131 -- .../v2/processes/connectors/fsspec/dropbox.py | 130 -- .../v2/processes/connectors/fsspec/fsspec.py | 344 ---- .../v2/processes/connectors/fsspec/gcs.py | 141 -- .../v2/processes/connectors/fsspec/s3.py | 163 -- .../v2/processes/connectors/fsspec/sftp.py | 166 -- .../v2/processes/connectors/fsspec/utils.py | 17 - .../v2/processes/connectors/google_drive.py | 335 ---- .../ingest/v2/processes/connectors/local.py | 203 --- .../ingest/v2/processes/connectors/mongodb.py | 137 -- .../v2/processes/connectors/onedrive.py | 218 --- .../v2/processes/connectors/opensearch.py | 155 -- .../v2/processes/connectors/pinecone.py | 178 --- .../v2/processes/connectors/salesforce.py | 293 ---- .../v2/processes/connectors/sharepoint.py | 411 ----- .../v2/processes/connectors/singlestore.py | 160 -- .../ingest/v2/processes/connectors/sql.py | 265 --- .../ingest/v2/processes/connectors/utils.py | 19 - .../v2/processes/connectors/weaviate.py | 232 --- unstructured/ingest/v2/processes/embedder.py | 82 - .../ingest/v2/processes/partitioner.py | 165 -- .../ingest/v2/processes/uncompress.py | 43 - unstructured/utils.py | 31 - 608 files changed, 943 insertions(+), 42409 deletions(-) delete mode 100644 requirements/cache.txt delete mode 100644 requirements/ingest/airtable.in delete mode 100644 requirements/ingest/airtable.txt delete mode 100644 requirements/ingest/astradb.in delete mode 100644 requirements/ingest/astradb.txt delete mode 100644 requirements/ingest/azure-cognitive-search.in delete mode 100644 requirements/ingest/azure-cognitive-search.txt delete mode 100644 requirements/ingest/azure.in delete mode 100644 requirements/ingest/azure.txt delete mode 100644 requirements/ingest/biomed.in delete mode 100644 requirements/ingest/biomed.txt delete mode 100644 requirements/ingest/box.in delete mode 100644 requirements/ingest/box.txt delete mode 100644 requirements/ingest/chroma.in delete mode 100644 requirements/ingest/chroma.txt delete mode 100644 requirements/ingest/clarifai.in delete mode 100644 requirements/ingest/clarifai.txt delete mode 100644 requirements/ingest/confluence.in delete mode 100644 requirements/ingest/confluence.txt delete mode 100644 requirements/ingest/databricks-volumes.in delete mode 100644 requirements/ingest/databricks-volumes.txt delete mode 100644 requirements/ingest/delta-table.in delete mode 100644 requirements/ingest/delta-table.txt delete mode 100644 requirements/ingest/discord.in delete mode 100644 requirements/ingest/discord.txt delete mode 100644 requirements/ingest/dropbox.in delete mode 100644 requirements/ingest/dropbox.txt delete mode 100644 requirements/ingest/elasticsearch.in delete mode 100644 requirements/ingest/elasticsearch.txt delete mode 100644 requirements/ingest/embed-aws-bedrock.in delete mode 100644 requirements/ingest/embed-aws-bedrock.txt delete mode 100644 requirements/ingest/embed-huggingface.in delete mode 100644 requirements/ingest/embed-huggingface.txt delete mode 100644 requirements/ingest/embed-mixedbreadai.in delete mode 100644 requirements/ingest/embed-mixedbreadai.txt delete mode 100644 requirements/ingest/embed-octoai.in delete mode 100644 requirements/ingest/embed-octoai.txt delete mode 100644 requirements/ingest/embed-openai.in delete mode 100644 requirements/ingest/embed-openai.txt delete mode 100644 requirements/ingest/embed-vertexai.in delete mode 100644 requirements/ingest/embed-vertexai.txt delete mode 100644 requirements/ingest/embed-voyageai.in delete mode 100644 requirements/ingest/embed-voyageai.txt delete mode 100644 requirements/ingest/gcs.in delete mode 100644 requirements/ingest/gcs.txt delete mode 100644 requirements/ingest/github.in delete mode 100644 requirements/ingest/github.txt delete mode 100644 requirements/ingest/gitlab.in delete mode 100644 requirements/ingest/gitlab.txt delete mode 100644 requirements/ingest/google-drive.in delete mode 100644 requirements/ingest/google-drive.txt delete mode 100644 requirements/ingest/hubspot.in delete mode 100644 requirements/ingest/hubspot.txt create mode 100644 requirements/ingest/ingest.txt delete mode 100644 requirements/ingest/jira.in delete mode 100644 requirements/ingest/jira.txt delete mode 100644 requirements/ingest/kafka.in delete mode 100644 requirements/ingest/kafka.txt delete mode 100644 requirements/ingest/mongodb.in delete mode 100644 requirements/ingest/mongodb.txt delete mode 100644 requirements/ingest/notion.in delete mode 100644 requirements/ingest/notion.txt delete mode 100644 requirements/ingest/onedrive.in delete mode 100644 requirements/ingest/onedrive.txt delete mode 100644 requirements/ingest/opensearch.in delete mode 100644 requirements/ingest/opensearch.txt delete mode 100644 requirements/ingest/outlook.in delete mode 100644 requirements/ingest/outlook.txt delete mode 100644 requirements/ingest/pinecone.in delete mode 100644 requirements/ingest/pinecone.txt delete mode 100644 requirements/ingest/postgres.in delete mode 100644 requirements/ingest/postgres.txt delete mode 100644 requirements/ingest/qdrant.in delete mode 100644 requirements/ingest/qdrant.txt delete mode 100644 requirements/ingest/reddit.in delete mode 100644 requirements/ingest/reddit.txt delete mode 100644 requirements/ingest/s3.in delete mode 100644 requirements/ingest/s3.txt delete mode 100644 requirements/ingest/salesforce.in delete mode 100644 requirements/ingest/salesforce.txt delete mode 100644 requirements/ingest/sftp.in delete mode 100644 requirements/ingest/sftp.txt delete mode 100644 requirements/ingest/sharepoint.in delete mode 100644 requirements/ingest/sharepoint.txt delete mode 100644 requirements/ingest/singlestore.in delete mode 100644 requirements/ingest/singlestore.txt delete mode 100644 requirements/ingest/slack.in delete mode 100644 requirements/ingest/slack.txt delete mode 100644 requirements/ingest/weaviate.in delete mode 100644 requirements/ingest/weaviate.txt delete mode 100644 requirements/ingest/wikipedia.in delete mode 100644 requirements/ingest/wikipedia.txt delete mode 100644 test_unstructured/ingest/utils/test_compression.py delete mode 100755 test_unstructured_ingest/dest/astradb.sh delete mode 100755 test_unstructured_ingest/dest/azure-cognitive-search.sh delete mode 100755 test_unstructured_ingest/dest/azure.sh delete mode 100755 test_unstructured_ingest/dest/box.sh delete mode 100755 test_unstructured_ingest/dest/chroma.sh delete mode 100755 test_unstructured_ingest/dest/clarifai.sh delete mode 100755 test_unstructured_ingest/dest/databricks-volumes.sh delete mode 100755 test_unstructured_ingest/dest/delta-table.sh delete mode 100755 test_unstructured_ingest/dest/dropbox.sh delete mode 100755 test_unstructured_ingest/dest/elasticsearch.sh delete mode 100755 test_unstructured_ingest/dest/gcs.sh delete mode 100755 test_unstructured_ingest/dest/kafka-local.sh delete mode 100755 test_unstructured_ingest/dest/mongodb.sh delete mode 100755 test_unstructured_ingest/dest/opensearch.sh delete mode 100755 test_unstructured_ingest/dest/pgvector.sh delete mode 100755 test_unstructured_ingest/dest/pinecone.sh delete mode 100755 test_unstructured_ingest/dest/qdrant.sh delete mode 100755 test_unstructured_ingest/dest/s3.sh delete mode 100755 test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh delete mode 100755 test_unstructured_ingest/dest/singlestore.sh delete mode 100755 test_unstructured_ingest/dest/sqlite.sh delete mode 100755 test_unstructured_ingest/dest/vectara.sh delete mode 100755 test_unstructured_ingest/dest/weaviate.sh delete mode 100755 test_unstructured_ingest/test-help.sh delete mode 100644 test_unstructured_ingest/unit/cli/test_cli.py delete mode 100644 test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py delete mode 100644 test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py delete mode 100644 test_unstructured_ingest/unit/connector/fsspec/test_paths.py delete mode 100644 test_unstructured_ingest/unit/connector/test_connector_git.py delete mode 100644 test_unstructured_ingest/unit/connector/test_salesforce_connector.py delete mode 100644 test_unstructured_ingest/unit/connector/test_serialization.py delete mode 100644 test_unstructured_ingest/unit/connector/test_sharepoint.py delete mode 100644 test_unstructured_ingest/unit/connector/test_sql_conform_dict.py delete mode 100644 test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py delete mode 100644 test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py delete mode 100644 test_unstructured_ingest/unit/test_error.py delete mode 100644 test_unstructured_ingest/unit/test_interfaces.py delete mode 100644 test_unstructured_ingest/unit/test_logger.py delete mode 100644 test_unstructured_ingest/unit/test_utils.py delete mode 100644 unstructured/ingest/README.md delete mode 100644 unstructured/ingest/__init__.py delete mode 100644 unstructured/ingest/cli/__init__.py delete mode 100644 unstructured/ingest/cli/base/__init__.py delete mode 100644 unstructured/ingest/cli/base/cmd.py delete mode 100644 unstructured/ingest/cli/base/dest.py delete mode 100644 unstructured/ingest/cli/base/src.py delete mode 100644 unstructured/ingest/cli/cli.py delete mode 100644 unstructured/ingest/cli/cmd_factory.py delete mode 100644 unstructured/ingest/cli/cmds/__init__.py delete mode 100644 unstructured/ingest/cli/cmds/airtable.py delete mode 100644 unstructured/ingest/cli/cmds/astradb.py delete mode 100644 unstructured/ingest/cli/cmds/azure_cognitive_search.py delete mode 100644 unstructured/ingest/cli/cmds/biomed.py delete mode 100644 unstructured/ingest/cli/cmds/chroma.py delete mode 100644 unstructured/ingest/cli/cmds/clarifai.py delete mode 100644 unstructured/ingest/cli/cmds/confluence.py delete mode 100644 unstructured/ingest/cli/cmds/databricks_volumes.py delete mode 100644 unstructured/ingest/cli/cmds/delta_table.py delete mode 100644 unstructured/ingest/cli/cmds/discord.py delete mode 100644 unstructured/ingest/cli/cmds/elasticsearch.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/__init__.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/azure.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/box.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/dropbox.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/fsspec.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/gcs.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/s3.py delete mode 100644 unstructured/ingest/cli/cmds/fsspec/sftp.py delete mode 100644 unstructured/ingest/cli/cmds/github.py delete mode 100644 unstructured/ingest/cli/cmds/gitlab.py delete mode 100644 unstructured/ingest/cli/cmds/google_drive.py delete mode 100644 unstructured/ingest/cli/cmds/hubspot.py delete mode 100644 unstructured/ingest/cli/cmds/jira.py delete mode 100644 unstructured/ingest/cli/cmds/kafka.py delete mode 100644 unstructured/ingest/cli/cmds/local.py delete mode 100644 unstructured/ingest/cli/cmds/mongodb.py delete mode 100644 unstructured/ingest/cli/cmds/notion.py delete mode 100644 unstructured/ingest/cli/cmds/onedrive.py delete mode 100644 unstructured/ingest/cli/cmds/opensearch.py delete mode 100644 unstructured/ingest/cli/cmds/outlook.py delete mode 100644 unstructured/ingest/cli/cmds/pinecone.py delete mode 100644 unstructured/ingest/cli/cmds/qdrant.py delete mode 100644 unstructured/ingest/cli/cmds/reddit.py delete mode 100644 unstructured/ingest/cli/cmds/salesforce.py delete mode 100644 unstructured/ingest/cli/cmds/sharepoint.py delete mode 100644 unstructured/ingest/cli/cmds/slack.py delete mode 100644 unstructured/ingest/cli/cmds/sql.py delete mode 100644 unstructured/ingest/cli/cmds/vectara.py delete mode 100644 unstructured/ingest/cli/cmds/weaviate.py delete mode 100644 unstructured/ingest/cli/cmds/wikipedia.py delete mode 100644 unstructured/ingest/cli/common.py delete mode 100644 unstructured/ingest/cli/interfaces.py delete mode 100644 unstructured/ingest/cli/utils.py delete mode 100644 unstructured/ingest/connector/__init__.py delete mode 100644 unstructured/ingest/connector/airtable.py delete mode 100644 unstructured/ingest/connector/astradb.py delete mode 100644 unstructured/ingest/connector/azure_cognitive_search.py delete mode 100644 unstructured/ingest/connector/biomed.py delete mode 100644 unstructured/ingest/connector/chroma.py delete mode 100644 unstructured/ingest/connector/clarifai.py delete mode 100644 unstructured/ingest/connector/confluence.py delete mode 100644 unstructured/ingest/connector/databricks_volumes.py delete mode 100644 unstructured/ingest/connector/delta_table.py delete mode 100644 unstructured/ingest/connector/discord.py delete mode 100644 unstructured/ingest/connector/elasticsearch.py delete mode 100644 unstructured/ingest/connector/fsspec/__init__.py delete mode 100644 unstructured/ingest/connector/fsspec/azure.py delete mode 100644 unstructured/ingest/connector/fsspec/box.py delete mode 100644 unstructured/ingest/connector/fsspec/dropbox.py delete mode 100644 unstructured/ingest/connector/fsspec/fsspec.py delete mode 100644 unstructured/ingest/connector/fsspec/gcs.py delete mode 100644 unstructured/ingest/connector/fsspec/s3.py delete mode 100644 unstructured/ingest/connector/fsspec/sftp.py delete mode 100644 unstructured/ingest/connector/git.py delete mode 100644 unstructured/ingest/connector/github.py delete mode 100644 unstructured/ingest/connector/gitlab.py delete mode 100644 unstructured/ingest/connector/google_drive.py delete mode 100644 unstructured/ingest/connector/hubspot.py delete mode 100644 unstructured/ingest/connector/jira.py delete mode 100644 unstructured/ingest/connector/kafka.py delete mode 100644 unstructured/ingest/connector/local.py delete mode 100644 unstructured/ingest/connector/mongodb.py delete mode 100644 unstructured/ingest/connector/notion/__init__.py delete mode 100644 unstructured/ingest/connector/notion/client.py delete mode 100644 unstructured/ingest/connector/notion/connector.py delete mode 100644 unstructured/ingest/connector/notion/helpers.py delete mode 100644 unstructured/ingest/connector/notion/interfaces.py delete mode 100644 unstructured/ingest/connector/notion/types/__init__.py delete mode 100644 unstructured/ingest/connector/notion/types/block.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/__init__.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/bookmark.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/breadcrumb.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/callout.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/child_database.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/child_page.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/code.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/column_list.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/divider.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/embed.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/equation.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/file.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/heading.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/image.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/link_preview.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/link_to_page.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/numbered_list.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/paragraph.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/pdf.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/quote.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/synced_block.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/table.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/table_of_contents.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/template.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/todo.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/toggle.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/unsupported.py delete mode 100644 unstructured/ingest/connector/notion/types/blocks/video.py delete mode 100644 unstructured/ingest/connector/notion/types/database.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/__init__.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/checkbox.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/created_by.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/created_time.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/date.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/email.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/files.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/formula.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/multiselect.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/number.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/people.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/phone_number.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/relation.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/rich_text.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/rollup.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/select.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/status.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/title.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/unique_id.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/url.py delete mode 100644 unstructured/ingest/connector/notion/types/database_properties/verification.py delete mode 100644 unstructured/ingest/connector/notion/types/date.py delete mode 100644 unstructured/ingest/connector/notion/types/file.py delete mode 100644 unstructured/ingest/connector/notion/types/page.py delete mode 100644 unstructured/ingest/connector/notion/types/parent.py delete mode 100644 unstructured/ingest/connector/notion/types/rich_text.py delete mode 100644 unstructured/ingest/connector/notion/types/user.py delete mode 100644 unstructured/ingest/connector/onedrive.py delete mode 100644 unstructured/ingest/connector/opensearch.py delete mode 100644 unstructured/ingest/connector/outlook.py delete mode 100644 unstructured/ingest/connector/pinecone.py delete mode 100644 unstructured/ingest/connector/qdrant.py delete mode 100644 unstructured/ingest/connector/reddit.py delete mode 100644 unstructured/ingest/connector/registry.py delete mode 100644 unstructured/ingest/connector/salesforce.py delete mode 100644 unstructured/ingest/connector/sharepoint.py delete mode 100644 unstructured/ingest/connector/slack.py delete mode 100644 unstructured/ingest/connector/sql.py delete mode 100644 unstructured/ingest/connector/vectara.py delete mode 100644 unstructured/ingest/connector/weaviate.py delete mode 100644 unstructured/ingest/connector/wikipedia.py delete mode 100644 unstructured/ingest/enhanced_dataclass/__init__.py delete mode 100644 unstructured/ingest/enhanced_dataclass/core.py delete mode 100644 unstructured/ingest/enhanced_dataclass/dataclasses.py delete mode 100644 unstructured/ingest/enhanced_dataclass/json_mixin.py delete mode 100644 unstructured/ingest/error.py delete mode 100755 unstructured/ingest/evaluate.py delete mode 100644 unstructured/ingest/img/unstructured_ingest_cli_pipeline_diagram.png delete mode 100644 unstructured/ingest/ingest_backoff/__init__.py delete mode 100644 unstructured/ingest/ingest_backoff/_common.py delete mode 100644 unstructured/ingest/ingest_backoff/_wrapper.py delete mode 100644 unstructured/ingest/interfaces.py delete mode 100644 unstructured/ingest/logger.py delete mode 100755 unstructured/ingest/main.py delete mode 100644 unstructured/ingest/pipeline/__init__.py delete mode 100644 unstructured/ingest/pipeline/copy.py delete mode 100644 unstructured/ingest/pipeline/doc_factory.py delete mode 100644 unstructured/ingest/pipeline/interfaces.py delete mode 100644 unstructured/ingest/pipeline/partition.py delete mode 100644 unstructured/ingest/pipeline/permissions.py delete mode 100644 unstructured/ingest/pipeline/pipeline.py delete mode 100644 unstructured/ingest/pipeline/reformat/__init__.py delete mode 100644 unstructured/ingest/pipeline/reformat/chunking.py delete mode 100644 unstructured/ingest/pipeline/reformat/embedding.py delete mode 100644 unstructured/ingest/pipeline/source.py delete mode 100644 unstructured/ingest/pipeline/utils.py delete mode 100644 unstructured/ingest/pipeline/write.py delete mode 100644 unstructured/ingest/processor.py delete mode 100644 unstructured/ingest/runner/__init__.py delete mode 100644 unstructured/ingest/runner/airtable.py delete mode 100644 unstructured/ingest/runner/astradb.py delete mode 100644 unstructured/ingest/runner/base_runner.py delete mode 100644 unstructured/ingest/runner/biomed.py delete mode 100644 unstructured/ingest/runner/confluence.py delete mode 100644 unstructured/ingest/runner/delta_table.py delete mode 100644 unstructured/ingest/runner/discord.py delete mode 100644 unstructured/ingest/runner/elasticsearch.py delete mode 100644 unstructured/ingest/runner/fsspec/__init__.py delete mode 100644 unstructured/ingest/runner/fsspec/azure.py delete mode 100644 unstructured/ingest/runner/fsspec/box.py delete mode 100644 unstructured/ingest/runner/fsspec/dropbox.py delete mode 100644 unstructured/ingest/runner/fsspec/fsspec.py delete mode 100644 unstructured/ingest/runner/fsspec/gcs.py delete mode 100644 unstructured/ingest/runner/fsspec/s3.py delete mode 100644 unstructured/ingest/runner/fsspec/sftp.py delete mode 100644 unstructured/ingest/runner/github.py delete mode 100644 unstructured/ingest/runner/gitlab.py delete mode 100644 unstructured/ingest/runner/google_drive.py delete mode 100644 unstructured/ingest/runner/hubspot.py delete mode 100644 unstructured/ingest/runner/jira.py delete mode 100644 unstructured/ingest/runner/kafka.py delete mode 100644 unstructured/ingest/runner/local.py delete mode 100644 unstructured/ingest/runner/mongodb.py delete mode 100644 unstructured/ingest/runner/notion.py delete mode 100644 unstructured/ingest/runner/onedrive.py delete mode 100644 unstructured/ingest/runner/opensearch.py delete mode 100644 unstructured/ingest/runner/outlook.py delete mode 100644 unstructured/ingest/runner/reddit.py delete mode 100644 unstructured/ingest/runner/salesforce.py delete mode 100644 unstructured/ingest/runner/sharepoint.py delete mode 100644 unstructured/ingest/runner/slack.py delete mode 100644 unstructured/ingest/runner/utils.py delete mode 100644 unstructured/ingest/runner/wikipedia.py delete mode 100644 unstructured/ingest/runner/writers/__init__.py delete mode 100644 unstructured/ingest/runner/writers/astradb.py delete mode 100644 unstructured/ingest/runner/writers/azure_cognitive_search.py delete mode 100644 unstructured/ingest/runner/writers/base_writer.py delete mode 100644 unstructured/ingest/runner/writers/chroma.py delete mode 100644 unstructured/ingest/runner/writers/clarifai.py delete mode 100644 unstructured/ingest/runner/writers/databricks_volumes.py delete mode 100644 unstructured/ingest/runner/writers/delta_table.py delete mode 100644 unstructured/ingest/runner/writers/elasticsearch.py delete mode 100644 unstructured/ingest/runner/writers/fsspec/__init__.py delete mode 100644 unstructured/ingest/runner/writers/fsspec/azure.py delete mode 100644 unstructured/ingest/runner/writers/fsspec/box.py delete mode 100644 unstructured/ingest/runner/writers/fsspec/dropbox.py delete mode 100644 unstructured/ingest/runner/writers/fsspec/gcs.py delete mode 100644 unstructured/ingest/runner/writers/fsspec/s3.py delete mode 100644 unstructured/ingest/runner/writers/kafka.py delete mode 100644 unstructured/ingest/runner/writers/mongodb.py delete mode 100644 unstructured/ingest/runner/writers/opensearch.py delete mode 100644 unstructured/ingest/runner/writers/pinecone.py delete mode 100644 unstructured/ingest/runner/writers/qdrant.py delete mode 100644 unstructured/ingest/runner/writers/sql.py delete mode 100644 unstructured/ingest/runner/writers/vectara.py delete mode 100644 unstructured/ingest/runner/writers/weaviate.py delete mode 100644 unstructured/ingest/utils/__init__.py delete mode 100644 unstructured/ingest/utils/compression.py delete mode 100644 unstructured/ingest/utils/data_prep.py delete mode 100644 unstructured/ingest/utils/string_and_date_utils.py delete mode 100644 unstructured/ingest/utils/table.py delete mode 100644 unstructured/ingest/v2/README.md delete mode 100644 unstructured/ingest/v2/__init__.py delete mode 100644 unstructured/ingest/v2/assets/pipeline.excalidraw delete mode 100644 unstructured/ingest/v2/assets/pipeline.png delete mode 100644 unstructured/ingest/v2/assets/sequence.png delete mode 100644 unstructured/ingest/v2/assets/sequence.txt delete mode 100644 unstructured/ingest/v2/cli/README.md delete mode 100644 unstructured/ingest/v2/cli/__init__.py delete mode 100644 unstructured/ingest/v2/cli/base/__init__.py delete mode 100644 unstructured/ingest/v2/cli/base/cmd.py delete mode 100644 unstructured/ingest/v2/cli/base/dest.py delete mode 100644 unstructured/ingest/v2/cli/base/importer.py delete mode 100644 unstructured/ingest/v2/cli/base/src.py delete mode 100644 unstructured/ingest/v2/cli/cli.py delete mode 100644 unstructured/ingest/v2/cli/cmds/__init__.py delete mode 100644 unstructured/ingest/v2/cli/cmds/astradb.py delete mode 100644 unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py delete mode 100644 unstructured/ingest/v2/cli/cmds/chroma.py delete mode 100644 unstructured/ingest/v2/cli/cmds/databricks_volumes.py delete mode 100644 unstructured/ingest/v2/cli/cmds/elasticsearch.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/__init__.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/azure.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/box.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/gcs.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/s3.py delete mode 100644 unstructured/ingest/v2/cli/cmds/fsspec/sftp.py delete mode 100644 unstructured/ingest/v2/cli/cmds/google_drive.py delete mode 100644 unstructured/ingest/v2/cli/cmds/local.py delete mode 100644 unstructured/ingest/v2/cli/cmds/mongodb.py delete mode 100644 unstructured/ingest/v2/cli/cmds/onedrive.py delete mode 100644 unstructured/ingest/v2/cli/cmds/opensearch.py delete mode 100644 unstructured/ingest/v2/cli/cmds/pinecone.py delete mode 100644 unstructured/ingest/v2/cli/cmds/salesforce.py delete mode 100644 unstructured/ingest/v2/cli/cmds/sharepoint.py delete mode 100644 unstructured/ingest/v2/cli/cmds/singlestore.py delete mode 100644 unstructured/ingest/v2/cli/cmds/sql.py delete mode 100644 unstructured/ingest/v2/cli/cmds/weaviate.py delete mode 100644 unstructured/ingest/v2/cli/configs/__init__.py delete mode 100644 unstructured/ingest/v2/cli/configs/chunk.py delete mode 100644 unstructured/ingest/v2/cli/configs/embed.py delete mode 100644 unstructured/ingest/v2/cli/configs/partition.py delete mode 100644 unstructured/ingest/v2/cli/configs/processor.py delete mode 100644 unstructured/ingest/v2/cli/interfaces.py delete mode 100644 unstructured/ingest/v2/cli/utils.py delete mode 100644 unstructured/ingest/v2/example.py delete mode 100644 unstructured/ingest/v2/examples/example_azure_cognitive_search.py delete mode 100644 unstructured/ingest/v2/examples/example_chroma.py delete mode 100644 unstructured/ingest/v2/examples/example_databricks_volumes.py delete mode 100644 unstructured/ingest/v2/examples/example_elasticsearch.py delete mode 100644 unstructured/ingest/v2/examples/example_local.py delete mode 100644 unstructured/ingest/v2/examples/example_mongodb.py delete mode 100644 unstructured/ingest/v2/examples/example_opensearch.py delete mode 100644 unstructured/ingest/v2/examples/example_pinecone.py delete mode 100644 unstructured/ingest/v2/examples/example_s3.py delete mode 100644 unstructured/ingest/v2/examples/example_salesforce.py delete mode 100644 unstructured/ingest/v2/examples/example_sharepoint.py delete mode 100644 unstructured/ingest/v2/examples/example_singlestore.py delete mode 100644 unstructured/ingest/v2/examples/example_sql.py delete mode 100644 unstructured/ingest/v2/examples/example_weaviate.py delete mode 100644 unstructured/ingest/v2/interfaces/__init__.py delete mode 100644 unstructured/ingest/v2/interfaces/connector.py delete mode 100644 unstructured/ingest/v2/interfaces/downloader.py delete mode 100644 unstructured/ingest/v2/interfaces/file_data.py delete mode 100644 unstructured/ingest/v2/interfaces/indexer.py delete mode 100644 unstructured/ingest/v2/interfaces/process.py delete mode 100644 unstructured/ingest/v2/interfaces/processor.py delete mode 100644 unstructured/ingest/v2/interfaces/upload_stager.py delete mode 100644 unstructured/ingest/v2/interfaces/uploader.py delete mode 100644 unstructured/ingest/v2/logger.py delete mode 100644 unstructured/ingest/v2/main.py delete mode 100644 unstructured/ingest/v2/pipeline/__init__.py delete mode 100644 unstructured/ingest/v2/pipeline/interfaces.py delete mode 100644 unstructured/ingest/v2/pipeline/pipeline.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/__init__.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/chunk.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/download.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/embed.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/index.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/partition.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/stage.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/uncompress.py delete mode 100644 unstructured/ingest/v2/pipeline/steps/upload.py delete mode 100644 unstructured/ingest/v2/pipeline/utils.py delete mode 100644 unstructured/ingest/v2/processes/__init__.py delete mode 100644 unstructured/ingest/v2/processes/chunker.py delete mode 100644 unstructured/ingest/v2/processes/connector_registry.py delete mode 100644 unstructured/ingest/v2/processes/connectors/__init__.py delete mode 100644 unstructured/ingest/v2/processes/connectors/astradb.py delete mode 100644 unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py delete mode 100644 unstructured/ingest/v2/processes/connectors/chroma.py delete mode 100644 unstructured/ingest/v2/processes/connectors/databricks_volumes.py delete mode 100644 unstructured/ingest/v2/processes/connectors/elasticsearch.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/__init__.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/azure.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/box.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/gcs.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/s3.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/sftp.py delete mode 100644 unstructured/ingest/v2/processes/connectors/fsspec/utils.py delete mode 100644 unstructured/ingest/v2/processes/connectors/google_drive.py delete mode 100644 unstructured/ingest/v2/processes/connectors/local.py delete mode 100644 unstructured/ingest/v2/processes/connectors/mongodb.py delete mode 100644 unstructured/ingest/v2/processes/connectors/onedrive.py delete mode 100644 unstructured/ingest/v2/processes/connectors/opensearch.py delete mode 100644 unstructured/ingest/v2/processes/connectors/pinecone.py delete mode 100644 unstructured/ingest/v2/processes/connectors/salesforce.py delete mode 100644 unstructured/ingest/v2/processes/connectors/sharepoint.py delete mode 100644 unstructured/ingest/v2/processes/connectors/singlestore.py delete mode 100644 unstructured/ingest/v2/processes/connectors/sql.py delete mode 100644 unstructured/ingest/v2/processes/connectors/utils.py delete mode 100644 unstructured/ingest/v2/processes/connectors/weaviate.py delete mode 100644 unstructured/ingest/v2/processes/embedder.py delete mode 100644 unstructured/ingest/v2/processes/partitioner.py delete mode 100644 unstructured/ingest/v2/processes/uncompress.py diff --git a/.github/actions/base-ingest-cache/action.yml b/.github/actions/base-ingest-cache/action.yml index b83a833cf..f29d86764 100644 --- a/.github/actions/base-ingest-cache/action.yml +++ b/.github/actions/base-ingest-cache/action.yml @@ -39,7 +39,7 @@ runs: python -m pip install --upgrade setuptools fi make install-ci - make install-all-ingest + make install-ingest - name: Save Ingest Cache if: steps.ingest-virtualenv-cache-restore.outputs.cache-hit != 'true' id: ingest-virtualenv-cache-save diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 142578885..12c261ffb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,7 +72,6 @@ jobs: - name: Install all doc and test dependencies run: | make install-ci - make install-all-ingest make check-licenses lint: @@ -273,37 +272,6 @@ jobs: python-version: ${{ matrix.python-version }} check-only: 'true' - test_ingest_unit: - strategy: - matrix: - python-version: [ "3.9","3.10" ] - runs-on: ubuntu-latest - needs: [ setup_ingest, lint ] - steps: - # actions/checkout MUST come before auth - - uses: 'actions/checkout@v4' - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Get full Python version - id: full-python-version - run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT - - name: Setup virtual environment - uses: ./.github/actions/base-ingest-cache - with: - python-version: ${{ matrix.python-version }} - - name: Test Ingest (unit) - env: - NLTK_DATA: ${{ github.workspace }}/nltk_data - PYTHON: python${{ matrix.python-version }} - run: | - source .venv/bin/activate - make install-ci - make install-all-ingest - PYTHONPATH=. ${PYTHON} -m pytest test_unstructured_ingest/unit - - test_ingest_src: strategy: matrix: @@ -378,8 +346,6 @@ jobs: PYTHON: python${{ matrix.python-version }} run: | source .venv/bin/activate - make install-ci - make install-all-ingest sudo apt-get update sudo apt-get install -y libmagic-dev poppler-utils libreoffice make install-pandoc @@ -392,103 +358,6 @@ jobs: ./test_unstructured_ingest/test-ingest-src.sh - test_ingest_dest: - environment: ci - strategy: - matrix: - python-version: ["3.9","3.10"] - runs-on: ubuntu-latest-m - needs: [setup_ingest, lint] - steps: - # actions/checkout MUST come before auth - - uses: 'actions/checkout@v4' - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Get full Python version - id: full-python-version - run: echo version=$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") >> $GITHUB_OUTPUT - - name: Setup virtual environment - uses: ./.github/actions/base-ingest-cache - with: - python-version: ${{ matrix.python-version }} - - name: Setup docker-compose - uses: KengoTODA/actions-setup-docker-compose@v1 - with: - version: '2.22.0' - - name: Test (end-to-end) - env: - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - S3_INGEST_TEST_ACCESS_KEY: ${{ secrets.S3_INGEST_TEST_ACCESS_KEY }} - S3_INGEST_TEST_SECRET_KEY: ${{ secrets.S3_INGEST_TEST_SECRET_KEY }} - AZURE_SEARCH_ENDPOINT: ${{ secrets.AZURE_SEARCH_ENDPOINT }} - AZURE_SEARCH_API_KEY: ${{ secrets.AZURE_SEARCH_API_KEY }} - BOX_APP_CONFIG: ${{ secrets.BOX_APP_CONFIG }} - DROPBOX_APP_KEY: ${{ secrets.DROPBOX_APP_KEY }} - DROPBOX_APP_SECRET: ${{ secrets.DROPBOX_APP_SECRET }} - DROPBOX_REFRESH_TOKEN: ${{ secrets.DROPBOX_REFRESH_TOKEN }} - GCP_INGEST_SERVICE_KEY: ${{ secrets.GCP_INGEST_SERVICE_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - MONGODB_URI: ${{ secrets.MONGODB_URI }} - MONGODB_DATABASE_NAME: ${{ secrets.MONGODB_DATABASE_NAME }} - AZURE_DEST_CONNECTION_STR: ${{ secrets.AZURE_DEST_CONNECTION_STR }} - PINECONE_API_KEY: ${{secrets.PINECONE_API_KEY}} - VECTARA_OAUTH_CLIENT_ID: ${{secrets.VECTARA_OAUTH_CLIENT_ID}} - VECTARA_OAUTH_SECRET: ${{secrets.VECTARA_OAUTH_SECRET}} - VECTARA_CUSTOMER_ID: ${{secrets.VECTARA_CUSTOMER_ID}} - ASTRA_DB_APPLICATION_TOKEN: ${{secrets.ASTRA_DB_TOKEN}} - ASTRA_DB_API_ENDPOINT: ${{secrets.ASTRA_DB_ENDPOINT}} - CLARIFAI_API_KEY: ${{secrets.CLARIFAI_API_KEY}} - DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}} - DATABRICKS_USERNAME: ${{secrets.DATABRICKS_USERNAME}} - DATABRICKS_PASSWORD: ${{secrets.DATABRICKS_PASSWORD}} - DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}} - OCR_AGENT: "unstructured.partition.utils.ocr_models.tesseract_ocr.OCRAgentTesseract" - CI: "true" - NLTK_DATA: ${{ github.workspace }}/nltk_data - PYTHON: python${{ matrix.python-version }} - run: | - source .venv/bin/activate - make install-ci - make install-all-ingest - sudo apt-get update - sudo apt-get install -y libmagic-dev poppler-utils libreoffice - make install-pandoc - sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5 - sudo apt-get update - sudo apt-get install -y tesseract-ocr - sudo apt-get install -y tesseract-ocr-kor - sudo apt-get install diffstat - tesseract --version - ./test_unstructured_ingest/test-ingest-dest.sh - - test_ingest_help: - environment: ci - strategy: - matrix: - python-version: ["3.9","3.10","3.11", "3.12"] - runs-on: ubuntu-latest - needs: [setup_ingest, lint] - steps: - - uses: 'actions/checkout@v4' - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Setup virtual environment - uses: ./.github/actions/base-ingest-cache - with: - python-version: ${{ matrix.python-version }} - - name: Validate --help - run: | - source .venv/bin/activate - make install-ci - make install-all-ingest - ./test_unstructured_ingest/test-help.sh - - test_unstructured_api_unit: strategy: matrix: diff --git a/.github/workflows/ingest-test-fixtures-update-pr.yml b/.github/workflows/ingest-test-fixtures-update-pr.yml index d22a5aab9..f724e8dfc 100644 --- a/.github/workflows/ingest-test-fixtures-update-pr.yml +++ b/.github/workflows/ingest-test-fixtures-update-pr.yml @@ -105,6 +105,7 @@ jobs: sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5 sudo apt-get install -y tesseract-ocr sudo apt-get install -y tesseract-ocr-kor + sudo apt-get install diffstat tesseract --version ./test_unstructured_ingest/test-ingest-src.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 293ffe581..68ccf02a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,12 @@ -## 0.15.15-dev0 +## 0.16.0 ### Enhancements +* **Remove ingest implementation.** The deprecated ingest functionality has been removed, as it is now maintained in the separate [unstructured-ingest](https://github.com/Unstructured-IO/unstructured-ingest) repository. + * Replace extras in `requirements/ingest` directory with a new `ingest.txt` extra for installing the `unstructured-ingest` library. + * Remove the `unstructured.ingest` submodule. + * Delete all shell scripts previously used for destination ingest tests. + ### Features ### Fixes diff --git a/MANIFEST.in b/MANIFEST.in index e5c39fc29..e4c7d4da5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -15,45 +15,3 @@ include requirements/extra-pptx.in include requirements/extra-xlsx.in include requirements/huggingface.in -# Ingest extras -include requirements/ingest/airtable.in -include requirements/ingest/astradb.in -include requirements/ingest/azure-cognitive-search.in -include requirements/ingest/azure.in -include requirements/ingest/biomed.in -include requirements/ingest/box.in -include requirements/ingest/chroma.in -include requirements/ingest/confluence.in -include requirements/ingest/databricks-volumes.in -include requirements/ingest/delta-table.in -include requirements/ingest/discord.in -include requirements/ingest/dropbox.in -include requirements/ingest/elasticsearch.in -include requirements/ingest/embed-aws-bedrock.in -include requirements/ingest/embed-huggingface.in -include requirements/ingest/embed-mixedbreadai.in -include requirements/ingest/embed-openai.in -include requirements/ingest/gcs.in -include requirements/ingest/github.in -include requirements/ingest/gitlab.in -include requirements/ingest/google-drive.in -include requirements/ingest/hubspot.in -include requirements/ingest/jira.in -include requirements/ingest/kafka.in -include requirements/ingest/mongodb.in -include requirements/ingest/notion.in -include requirements/ingest/onedrive.in -include requirements/ingest/opensearch.in -include requirements/ingest/outlook.in -include requirements/ingest/pinecone.in -include requirements/ingest/postgres.in -include requirements/ingest/qdrant.in -include requirements/ingest/reddit.in -include requirements/ingest/s3.in -include requirements/ingest/salesforce.in -include requirements/ingest/sftp.in -include requirements/ingest/sharepoint.in -include requirements/ingest/slack.in -include requirements/ingest/singlestore.in -include requirements/ingest/weaviate.in -include requirements/ingest/wikipedia.in diff --git a/Makefile b/Makefile index d9a3e1803..714992a83 100644 --- a/Makefile +++ b/Makefile @@ -99,171 +99,9 @@ install-xlsx: .PHONY: install-all-docs install-all-docs: install-base install-csv install-docx install-epub install-odt install-pypandoc install-markdown install-pdf-image install-pptx install-xlsx -.PHONY: install-all-ingest -install-all-ingest: - find requirements/ingest -type f -name "*.txt" -exec ${PYTHON} -m pip install -r '{}' ';' - - -.PHONY: install-ingest-google-drive -install-ingest-google-drive: - ${PYTHON} -m pip install -r requirements/ingest/google-drive.txt - -## install-ingest-s3: install requirements for the s3 connector -.PHONY: install-ingest-s3 -install-ingest-s3: - ${PYTHON} -m pip install -r requirements/ingest/s3.txt - -.PHONY: install-ingest-gcs -install-ingest-gcs: - ${PYTHON} -m pip install -r requirements/ingest/gcs.txt - -.PHONY: install-ingest-dropbox -install-ingest-dropbox: - ${PYTHON} -m pip install -r requirements/ingest/dropbox.txt - -.PHONY: install-ingest-azure -install-ingest-azure: - ${PYTHON} -m pip install -r requirements/ingest/azure.txt - -.PHONY: install-ingest-box -install-ingest-box: - ${PYTHON} -m pip install -r requirements/ingest/box.txt - -.PHONY: install-ingest-delta-table -install-ingest-delta-table: - ${PYTHON} -m pip install -r requirements/ingest/delta-table.txt - -.PHONY: install-ingest-discord -install-ingest-discord: - ${PYTHON} -m pip install -r requirements/ingest/discord.txt - -.PHONY: install-ingest-github -install-ingest-github: - ${PYTHON} -m pip install -r requirements/ingest/github.txt - -.PHONY: install-ingest-biomed -install-ingest-biomed: - ${PYTHON} -m pip install -r requirements/ingest/biomed.txt - -.PHONY: install-ingest-gitlab -install-ingest-gitlab: - ${PYTHON} -m pip install -r requirements/ingest/gitlab.txt - -.PHONY: install-ingest-onedrive -install-ingest-onedrive: - ${PYTHON} -m pip install -r requirements/ingest/onedrive.txt - -.PHONY: install-ingest-outlook -install-ingest-outlook: - ${PYTHON} -m pip install -r requirements/ingest/outlook.txt - -.PHONY: install-ingest-reddit -install-ingest-reddit: - ${PYTHON} -m pip install -r requirements/ingest/reddit.txt - -.PHONY: install-ingest-slack -install-ingest-slack: - ${PYTHON} -m pip install -r requirements/ingest/slack.txt - -.PHONY: install-ingest-kafka -install-ingest-kafka: - ${PYTHON} -m pip install -r requirements/ingest/kafka.txt - -.PHONY: install-ingest-wikipedia -install-ingest-wikipedia: - ${PYTHON} -m pip install -r requirements/ingest/wikipedia.txt - -.PHONY: install-ingest-elasticsearch -install-ingest-elasticsearch: - ${PYTHON} -m pip install -r requirements/ingest/elasticsearch.txt - -.PHONY: install-ingest-opensearch -install-ingest-opensearch: - ${PYTHON} -m pip install -r requirements/ingest/opensearch.txt - -.PHONY: install-ingest-confluence -install-ingest-confluence: - ${PYTHON} -m pip install -r requirements/ingest/confluence.txt - -.PHONY: install-ingest-airtable -install-ingest-airtable: - ${PYTHON} -m pip install -r requirements/ingest/airtable.txt - -.PHONY: install-ingest-sharepoint -install-ingest-sharepoint: - ${PYTHON} -m pip install -r requirements/ingest/sharepoint.txt - -.PHONY: install-ingest-singlestore -install-ingest-singlestore: - ${PYTHON} -m pip install -r requirements/ingest/singlestore.txt - -.PHONY: install-ingest-weaviate -install-ingest-weaviate: - ${PYTHON} -m pip install -r requirements/ingest/weaviate.txt - -.PHONY: install-ingest-local -install-ingest-local: - echo "no unique dependencies for local connector" - -.PHONY: install-ingest-notion -install-ingest-notion: - ${PYTHON} -m pip install -r requirements/ingest/notion.txt - -.PHONY: install-ingest-salesforce -install-ingest-salesforce: - ${PYTHON} -m pip install -r requirements/ingest/salesforce.txt - -.PHONY: install-ingest-jira -install-ingest-jira: - ${PYTHON} -m pip install -r requirements/ingest/jira.txt - -.PHONY: install-ingest-hubspot -install-ingest-hubspot: - ${PYTHON} -m pip install -r requirements/ingest/hubspot.txt - -.PHONY: install-ingest-sftp -install-ingest-sftp: - ${PYTHON} -m pip install -r requirements/ingest/sftp.txt - -.PHONY: install-ingest-pinecone -install-ingest-pinecone: - ${PYTHON} -m pip install -r requirements/ingest/pinecone.txt - -.PHONY: install-ingest-qdrant -install-ingest-qdrant: - ${PYTHON} -m pip install -r requirements/ingest/qdrant.txt - -.PHONY: install-ingest-chroma -install-ingest-chroma: - ${PYTHON} -m pip install -r requirements/ingest/chroma.txt - -.PHONY: install-ingest-postgres -install-ingest-postgres: - ${PYTHON} -m pip install -r requirements/ingest/postgres.txt - -.PHONY: install-ingest-mongodb -install-ingest-mongodb: - ${PYTHON} -m pip install -r requirements/ingest/mongodb.txt - -.PHONY: install-ingest-databricks-volumes -install-ingest-databricks-volumes: - ${PYTHON} -m pip install -r requirements/ingest/databricks-volumes.txt - -.PHONY: install-ingest-astradb -install-ingest-astradb: - ${PYTHON} -m pip install -r requirements/ingest/astradb.txt - -.PHONY: install-ingest-clarifai -install-ingest-clarifai: - ${PYTHON} -m pip install -r requirements/ingest/clarifai.txt - -.PHONY: install-embed-huggingface -install-embed-huggingface: - ${PYTHON} -m pip install -r requirements/ingest/embed-huggingface.txt - -.PHONY: install-unstructured-inference -install-unstructured-inference: - ${PYTHON} -m pip install -r requirements/ingest/local-inference.txt +.PHONY: install-ingest +install-ingest: + python3 -m pip install -r requirements/ingest/ingest.txt ## install-local-inference: installs requirements for local inference .PHONY: install-local-inference @@ -367,7 +205,7 @@ test-extra-xlsx: ## check: runs linters (includes tests) .PHONY: check -check: check-ruff check-black check-flake8 check-version check-flake8-print +check: check-ruff check-black check-flake8 check-version .PHONY: check-shfmt check-shfmt: @@ -385,12 +223,6 @@ check-flake8: check-licenses: @scripts/check-licenses.sh -# Check for print statements in ingest since anything going to console should be using the ingest logger -# as it has a built in filter to redact sensitive information -.PHONY: check-flake8-print -check-flake8-print: - flake8 --per-file-ignores "" ./unstructured/ingest - .PHONY: check-ruff check-ruff: # -- ruff options are determined by pyproject.toml -- diff --git a/docs/requirements.in b/docs/requirements.in index 27a82d80c..46b71caac 100644 --- a/docs/requirements.in +++ b/docs/requirements.in @@ -22,4 +22,4 @@ furo==2023.7.26 certifi>=2022.12.07 # NOTE(ronny) - Added to suppress Sphinx warnings -myst-parser \ No newline at end of file +myst-parser diff --git a/docs/requirements.txt b/docs/requirements.txt index ee5fdd1d2..e20c1267e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -10,17 +10,17 @@ babel==2.13.1 # via sphinx beautifulsoup4==4.12.2 # via - # -c base.txt + # -c ./deps/base.txt # furo certifi==2023.11.17 # via - # -c base.txt + # -c ./deps/base.txt # -c constraints.in # -r build.in # requests charset-normalizer==3.3.2 # via - # -c base.txt + # -c ./deps/base.txt # requests docutils==0.18.1 # via @@ -32,7 +32,7 @@ furo==2023.7.26 # via -r build.in idna==3.6 # via - # -c base.txt + # -c ./deps/base.txt # requests imagesize==1.4.1 # via sphinx @@ -56,7 +56,7 @@ myst-parser==2.0.0 # via -r build.in packaging==23.2 # via - # -c base.txt + # -c ./deps/base.txt # sphinx pygments==2.17.2 # via @@ -69,13 +69,13 @@ pyyaml==6.0.1 # via myst-parser requests==2.31.0 # via - # -c base.txt + # -c ./deps/base.txt # sphinx snowballstemmer==2.2.0 # via sphinx soupsieve==2.5 # via - # -c base.txt + # -c ./deps/base.txt # beautifulsoup4 sphinx==6.2.1 # via @@ -118,7 +118,7 @@ sphinxcontrib-serializinghtml==1.1.5 # sphinx urllib3==1.26.18 # via - # -c base.txt + # -c ./deps/base.txt # -c constraints.in # requests zipp==3.17.0 diff --git a/requirements/Makefile b/requirements/Makefile index 9c4175401..9e6b685fc 100644 --- a/requirements/Makefile +++ b/requirements/Makefile @@ -3,12 +3,8 @@ SHELL := /bin/bash BASE_REQUIREMENTS := $(shell ls ./*.in) BASE_REQUIREMENTSTXT := $(patsubst %.in,%.txt,$(BASE_REQUIREMENTS)) -INGEST_REQUIREMENTS := $(shell ls ./ingest/*.in) -INGEST_REQUIREMENTSTXT := $(patsubst %.in,%.txt,$(INGEST_REQUIREMENTS)) - - .PHONY: all -all: compile-all-base compile-ingest +all: compile-all-base .PHONY: compile-test compile-test: @@ -26,18 +22,9 @@ compile-base: compile-all-base: compile-base compile-test compile-dev @$(foreach file,$(BASE_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;) -.PHONY: compile-ingest -compile-ingest: - @$(foreach file,$(INGEST_REQUIREMENTS),echo -e "\n\ncompiling: $(file)" && pip-compile --no-strip-extras --upgrade $(file) || exit;) - .PHONY: clean -clean: clean-base clean-ingest +clean: clean-base .PHONY: clean-base clean-base: - rm $(BASE_REQUIREMENTSTXT) - -.PHONY: clean-ingest -clean-ingest: - rm $(INGEST_REQUIREMENTSTXT) - + rm $(BASE_REQUIREMENTSTXT) \ No newline at end of file diff --git a/requirements/base.txt b/requirements/base.txt index b0f454c0f..5ff129c06 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,7 +4,7 @@ # # pip-compile ./base.in # -anyio==4.5.0 +anyio==4.6.0 # via httpx backoff==2.2.1 # via -r ./base.in @@ -36,7 +36,7 @@ dataclasses-json==0.6.7 # unstructured-client deepdiff==8.0.1 # via unstructured-client -emoji==2.13.0 +emoji==2.14.0 # via -r ./base.in exceptiongroup==1.2.2 # via anyio @@ -44,7 +44,7 @@ filetype==1.2.0 # via -r ./base.in h11==0.14.0 # via httpcore -httpcore==1.0.5 +httpcore==1.0.6 # via httpx httpx==0.27.2 # via unstructured-client @@ -88,7 +88,7 @@ psutil==6.0.0 # via -r ./base.in pycparser==2.22 # via cffi -pypdf==5.0.0 +pypdf==5.0.1 # via unstructured-client python-dateutil==2.9.0.post0 # via unstructured-client @@ -98,7 +98,7 @@ python-magic==0.4.27 # via -r ./base.in python-oxmsg==0.0.1 # via -r ./base.in -rapidfuzz==3.9.7 +rapidfuzz==3.10.0 # via -r ./base.in regex==2024.9.11 # via nltk @@ -130,7 +130,6 @@ typing-extensions==4.12.2 # via # -r ./base.in # anyio - # emoji # pypdf # python-oxmsg # typing-inspect @@ -140,7 +139,9 @@ typing-inspect==0.9.0 # dataclasses-json # unstructured-client unstructured-client==0.25.9 - # via -r ./base.in + # via + # -c ././deps/constraints.txt + # -r ./base.in urllib3==1.26.20 # via # -c ././deps/constraints.txt diff --git a/requirements/cache.txt b/requirements/cache.txt deleted file mode 100644 index d229daaec..000000000 --- a/requirements/cache.txt +++ /dev/null @@ -1 +0,0 @@ -# a \ No newline at end of file diff --git a/requirements/dev.txt b/requirements/dev.txt index 4df21d1ab..3ce9e87d6 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -38,7 +38,7 @@ platformdirs==4.3.6 # virtualenv pre-commit==3.8.0 # via -r ./dev.in -pyproject-hooks==1.1.0 +pyproject-hooks==1.2.0 # via # build # pip-tools @@ -46,12 +46,12 @@ pyyaml==6.0.2 # via # -c ./test.txt # pre-commit -tomli==2.0.1 +tomli==2.0.2 # via # -c ./test.txt # build # pip-tools -virtualenv==20.26.5 +virtualenv==20.26.6 # via pre-commit wheel==0.44.0 # via pip-tools diff --git a/requirements/extra-csv.txt b/requirements/extra-csv.txt index f606a04a4..1896204fb 100644 --- a/requirements/extra-csv.txt +++ b/requirements/extra-csv.txt @@ -8,7 +8,7 @@ numpy==1.26.4 # via # -c ./base.txt # pandas -pandas==2.2.2 +pandas==2.2.3 # via -r ./extra-csv.in python-dateutil==2.9.0.post0 # via @@ -20,5 +20,5 @@ six==1.16.0 # via # -c ./base.txt # python-dateutil -tzdata==2024.1 +tzdata==2024.2 # via pandas diff --git a/requirements/extra-paddleocr.txt b/requirements/extra-paddleocr.txt index 048822000..db0079f9f 100644 --- a/requirements/extra-paddleocr.txt +++ b/requirements/extra-paddleocr.txt @@ -4,7 +4,7 @@ # # pip-compile ./extra-paddleocr.in # -anyio==4.5.0 +anyio==4.6.0 # via # -c ./base.txt # httpx @@ -32,13 +32,13 @@ exceptiongroup==1.2.2 # via # -c ./base.txt # anyio -fonttools==4.53.1 +fonttools==4.54.1 # via matplotlib h11==0.14.0 # via # -c ./base.txt # httpcore -httpcore==1.0.5 +httpcore==1.0.6 # via # -c ./base.txt # httpx @@ -127,7 +127,7 @@ python-dateutil==2.9.0.post0 # matplotlib pyyaml==6.0.2 # via unstructured-paddleocr -rapidfuzz==3.9.7 +rapidfuzz==3.10.0 # via # -c ./base.txt # unstructured-paddleocr diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index 29ace44ca..a7d3ce8cf 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -42,7 +42,7 @@ filelock==3.16.1 # transformers flatbuffers==24.3.25 # via onnxruntime -fonttools==4.53.1 +fonttools==4.54.1 # via matplotlib fsspec==2024.9.0 # via @@ -60,14 +60,14 @@ googleapis-common-protos==1.65.0 # via # google-api-core # grpcio-status -grpcio==1.66.1 +grpcio==1.66.2 # via # -c ././deps/constraints.txt # google-api-core # grpcio-status grpcio-status==1.62.3 # via google-api-core -huggingface-hub==0.25.0 +huggingface-hub==0.25.1 # via # timm # tokenizers @@ -119,7 +119,7 @@ numpy==1.26.4 # transformers omegaconf==2.3.0 # via effdet -onnx==1.16.2 +onnx==1.17.0 # via # -r ./extra-pdf-image.in # unstructured-inference @@ -138,7 +138,7 @@ packaging==24.1 # pikepdf # transformers # unstructured-pytesseract -pandas==2.2.2 +pandas==2.2.3 # via layoutparser pdf2image==1.17.0 # via @@ -152,7 +152,7 @@ pdfplumber==0.11.4 # via layoutparser pi-heif==0.18.0 # via -r ./extra-pdf-image.in -pikepdf==9.2.1 +pikepdf==9.3.0 # via -r ./extra-pdf-image.in pillow==10.4.0 # via @@ -194,7 +194,7 @@ pycparser==2.22 # cffi pyparsing==3.1.4 # via matplotlib -pypdf==5.0.0 +pypdf==5.0.1 # via # -c ./base.txt # -r ./extra-pdf-image.in @@ -205,7 +205,7 @@ python-dateutil==2.9.0.post0 # -c ./base.txt # matplotlib # pandas -python-multipart==0.0.9 +python-multipart==0.0.12 # via unstructured-inference pytz==2024.2 # via pandas @@ -216,7 +216,7 @@ pyyaml==6.0.2 # omegaconf # timm # transformers -rapidfuzz==3.9.7 +rapidfuzz==3.10.0 # via # -c ./base.txt # unstructured-inference @@ -279,7 +279,7 @@ typing-extensions==4.12.2 # iopath # pypdf # torch -tzdata==2024.1 +tzdata==2024.2 # via pandas unstructured-inference==0.7.36 # via -r ./extra-pdf-image.in diff --git a/requirements/extra-xlsx.txt b/requirements/extra-xlsx.txt index bd931000a..ff08577a7 100644 --- a/requirements/extra-xlsx.txt +++ b/requirements/extra-xlsx.txt @@ -14,7 +14,7 @@ numpy==1.26.4 # pandas openpyxl==3.1.5 # via -r ./extra-xlsx.in -pandas==2.2.2 +pandas==2.2.3 # via -r ./extra-xlsx.in python-dateutil==2.9.0.post0 # via @@ -26,7 +26,7 @@ six==1.16.0 # via # -c ./base.txt # python-dateutil -tzdata==2024.1 +tzdata==2024.2 # via pandas xlrd==2.0.1 # via -r ./extra-xlsx.in diff --git a/requirements/huggingface.txt b/requirements/huggingface.txt index daa466d3d..7b2e04bde 100644 --- a/requirements/huggingface.txt +++ b/requirements/huggingface.txt @@ -25,7 +25,7 @@ fsspec==2024.9.0 # via # huggingface-hub # torch -huggingface-hub==0.25.0 +huggingface-hub==0.25.1 # via # tokenizers # transformers diff --git a/requirements/ingest/airtable.in b/requirements/ingest/airtable.in deleted file mode 100644 index e6e85c3c6..000000000 --- a/requirements/ingest/airtable.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -pyairtable diff --git a/requirements/ingest/airtable.txt b/requirements/ingest/airtable.txt deleted file mode 100644 index e45acd598..000000000 --- a/requirements/ingest/airtable.txt +++ /dev/null @@ -1,44 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/airtable.in -# -annotated-types==0.7.0 - # via pydantic -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -inflection==0.5.1 - # via pyairtable -pyairtable==2.3.3 - # via -r ./ingest/airtable.in -pydantic==2.9.2 - # via pyairtable -pydantic-core==2.23.4 - # via pydantic -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # pyairtable -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # pyairtable - # pydantic - # pydantic-core -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # pyairtable - # requests diff --git a/requirements/ingest/astradb.in b/requirements/ingest/astradb.in deleted file mode 100644 index 0c99a4c93..000000000 --- a/requirements/ingest/astradb.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -astrapy diff --git a/requirements/ingest/astradb.txt b/requirements/ingest/astradb.txt deleted file mode 100644 index 46553972a..000000000 --- a/requirements/ingest/astradb.txt +++ /dev/null @@ -1,100 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/astradb.in -# -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -astrapy==1.4.2 - # via -r ./ingest/astradb.in -cassandra-driver==3.29.2 - # via cassio -cassio==0.1.9 - # via astrapy -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -click==8.1.7 - # via - # -c ./ingest/../base.txt - # geomet -deprecation==2.1.0 - # via astrapy -dnspython==2.6.1 - # via pymongo -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -geomet==0.2.1.post1 - # via cassandra-driver -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -h2==4.1.0 - # via httpx -hpack==4.0.0 - # via h2 -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx[http2]==0.27.2 - # via - # -c ./ingest/../base.txt - # astrapy -hyperframe==6.0.1 - # via h2 -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # cassio -packaging==24.1 - # via - # -c ./ingest/../base.txt - # deprecation -pymongo==4.9.1 - # via astrapy -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # cassio -six==1.16.0 - # via - # -c ./ingest/../base.txt - # geomet -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -toml==0.10.2 - # via astrapy -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -uuid6==2024.7.10 - # via astrapy diff --git a/requirements/ingest/azure-cognitive-search.in b/requirements/ingest/azure-cognitive-search.in deleted file mode 100644 index 226649fb3..000000000 --- a/requirements/ingest/azure-cognitive-search.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -azure-search-documents diff --git a/requirements/ingest/azure-cognitive-search.txt b/requirements/ingest/azure-cognitive-search.txt deleted file mode 100644 index ef220fca5..000000000 --- a/requirements/ingest/azure-cognitive-search.txt +++ /dev/null @@ -1,45 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/azure-cognitive-search.in -# -azure-common==1.1.28 - # via azure-search-documents -azure-core==1.31.0 - # via azure-search-documents -azure-search-documents==11.5.1 - # via -r ./ingest/azure-cognitive-search.in -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -isodate==0.6.1 - # via azure-search-documents -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # azure-core -six==1.16.0 - # via - # -c ./ingest/../base.txt - # azure-core - # isodate -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # azure-core - # azure-search-documents -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/azure.in b/requirements/ingest/azure.in deleted file mode 100644 index e90750100..000000000 --- a/requirements/ingest/azure.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -adlfs -fsspec diff --git a/requirements/ingest/azure.txt b/requirements/ingest/azure.txt deleted file mode 100644 index b02308977..000000000 --- a/requirements/ingest/azure.txt +++ /dev/null @@ -1,108 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/azure.in -# -adlfs==2024.7.0 - # via -r ./ingest/azure.in -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via adlfs -aiosignal==1.3.1 - # via aiohttp -async-timeout==4.0.3 - # via aiohttp -attrs==24.2.0 - # via aiohttp -azure-core==1.31.0 - # via - # adlfs - # azure-identity - # azure-storage-blob -azure-datalake-store==0.0.53 - # via adlfs -azure-identity==1.18.0 - # via adlfs -azure-storage-blob==12.23.0 - # via adlfs -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # azure-datalake-store - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # azure-identity - # azure-storage-blob - # msal - # pyjwt -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -fsspec==2024.9.0 - # via - # -r ./ingest/azure.in - # adlfs -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests - # yarl -isodate==0.6.1 - # via azure-storage-blob -msal==1.31.0 - # via - # azure-datalake-store - # azure-identity - # msal-extensions -msal-extensions==1.2.0 - # via azure-identity -multidict==6.1.0 - # via - # aiohttp - # yarl -portalocker==2.10.1 - # via msal-extensions -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pyjwt[crypto]==2.9.0 - # via msal -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # azure-core - # azure-datalake-store - # msal -six==1.16.0 - # via - # -c ./ingest/../base.txt - # azure-core - # isodate -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # azure-core - # azure-identity - # azure-storage-blob - # multidict -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/biomed.in b/requirements/ingest/biomed.in deleted file mode 100644 index 7a231f44f..000000000 --- a/requirements/ingest/biomed.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -bs4 diff --git a/requirements/ingest/biomed.txt b/requirements/ingest/biomed.txt deleted file mode 100644 index 770ec68a4..000000000 --- a/requirements/ingest/biomed.txt +++ /dev/null @@ -1,16 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/biomed.in -# -beautifulsoup4==4.12.3 - # via - # -c ./ingest/../base.txt - # bs4 -bs4==0.0.2 - # via -r ./ingest/biomed.in -soupsieve==2.6 - # via - # -c ./ingest/../base.txt - # beautifulsoup4 diff --git a/requirements/ingest/box.in b/requirements/ingest/box.in deleted file mode 100644 index 3b123f814..000000000 --- a/requirements/ingest/box.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -boxfs -fsspec diff --git a/requirements/ingest/box.txt b/requirements/ingest/box.txt deleted file mode 100644 index 297f02410..000000000 --- a/requirements/ingest/box.txt +++ /dev/null @@ -1,65 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/box.in -# -attrs==24.2.0 - # via boxsdk -boxfs==0.3.0 - # via -r ./ingest/box.in -boxsdk[jwt]==3.13.0 - # via boxfs -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # boxsdk -fsspec==2024.9.0 - # via - # -r ./ingest/box.in - # boxfs -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pyjwt==2.9.0 - # via boxsdk -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # boxsdk -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # boxsdk - # requests-toolbelt -requests-toolbelt==1.0.0 - # via - # -c ./ingest/../base.txt - # boxsdk -six==1.16.0 - # via - # -c ./ingest/../base.txt - # python-dateutil -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # boxsdk - # requests diff --git a/requirements/ingest/chroma.in b/requirements/ingest/chroma.in deleted file mode 100644 index b94a6b462..000000000 --- a/requirements/ingest/chroma.in +++ /dev/null @@ -1,10 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -chromadb>0.4.14 -importlib-metadata>=8.2.0 -# Future releases adds in typer-cli which breaks the resolution of typer as a library -typer<=0.9.0 -# tenacity 9.0.0 is being installed via chroma, but other dependencies (langchain) restrict tenacity -# to <9.0.0 and resolve to 8.5.0. -# The original langchain pin: https://github.com/langchain-ai/langchain/pull/849/ -tenacity==8.5.0 diff --git a/requirements/ingest/chroma.txt b/requirements/ingest/chroma.txt deleted file mode 100644 index e0bd8c909..000000000 --- a/requirements/ingest/chroma.txt +++ /dev/null @@ -1,256 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/chroma.in -# -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx - # starlette - # watchfiles -backoff==2.2.1 - # via - # -c ./ingest/../base.txt - # opentelemetry-exporter-otlp-proto-grpc - # posthog -bcrypt==4.2.0 - # via chromadb -cachetools==5.5.0 - # via google-auth -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpx - # kubernetes - # pulsar-client - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -chroma-hnswlib==0.7.3 - # via chromadb -chromadb==0.4.17 - # via -r ./ingest/chroma.in -click==8.1.7 - # via - # -c ./ingest/../base.txt - # typer - # uvicorn -coloredlogs==15.0.1 - # via onnxruntime -deprecated==1.2.14 - # via opentelemetry-api -durationpy==0.7 - # via kubernetes -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -fastapi==0.115.0 - # via chromadb -filelock==3.16.1 - # via huggingface-hub -flatbuffers==24.3.25 - # via onnxruntime -fsspec==2024.9.0 - # via huggingface-hub -google-auth==2.35.0 - # via kubernetes -googleapis-common-protos==1.65.0 - # via opentelemetry-exporter-otlp-proto-grpc -grpcio==1.66.1 - # via - # -c ./ingest/../deps/constraints.txt - # chromadb - # opentelemetry-exporter-otlp-proto-grpc -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore - # uvicorn -httptools==0.6.1 - # via uvicorn -huggingface-hub==0.25.0 - # via tokenizers -humanfriendly==10.0 - # via coloredlogs -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests -importlib-metadata==8.5.0 - # via - # -c ./ingest/../deps/constraints.txt - # -r ./ingest/chroma.in -importlib-resources==6.4.5 - # via chromadb -kubernetes==31.0.0 - # via chromadb -monotonic==1.6 - # via posthog -mpmath==1.3.0 - # via sympy -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # chroma-hnswlib - # chromadb - # onnxruntime -oauthlib==3.2.2 - # via - # kubernetes - # requests-oauthlib -onnxruntime==1.19.2 - # via chromadb -opentelemetry-api==1.16.0 - # via - # chromadb - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-sdk -opentelemetry-exporter-otlp-proto-grpc==1.16.0 - # via chromadb -opentelemetry-proto==1.16.0 - # via opentelemetry-exporter-otlp-proto-grpc -opentelemetry-sdk==1.16.0 - # via - # chromadb - # opentelemetry-exporter-otlp-proto-grpc -opentelemetry-semantic-conventions==0.37b0 - # via opentelemetry-sdk -overrides==7.7.0 - # via chromadb -packaging==24.1 - # via - # -c ./ingest/../base.txt - # build - # huggingface-hub - # onnxruntime -posthog==3.6.6 - # via chromadb -protobuf==4.25.5 - # via - # -c ./ingest/../deps/constraints.txt - # googleapis-common-protos - # onnxruntime - # opentelemetry-proto -pulsar-client==3.5.0 - # via chromadb -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -pydantic==2.9.2 - # via - # chromadb - # fastapi -pydantic-core==2.23.4 - # via pydantic -pypika==0.48.9 - # via chromadb -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # kubernetes - # posthog -python-dotenv==1.0.1 - # via uvicorn -pyyaml==6.0.2 - # via - # chromadb - # huggingface-hub - # kubernetes - # uvicorn -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # chromadb - # huggingface-hub - # kubernetes - # posthog - # requests-oauthlib -requests-oauthlib==2.0.0 - # via kubernetes -rsa==4.9 - # via google-auth -six==1.16.0 - # via - # -c ./ingest/../base.txt - # kubernetes - # posthog - # python-dateutil -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -starlette==0.38.5 - # via fastapi -sympy==1.13.3 - # via onnxruntime -tenacity==8.5.0 - # via - # -r ./ingest/chroma.in - # chromadb -tokenizers==0.19.1 - # via - # -c ./ingest/../deps/constraints.txt - # chromadb -tqdm==4.66.5 - # via - # -c ./ingest/../base.txt - # chromadb - # huggingface-hub -typer==0.9.0 - # via - # -r ./ingest/chroma.in - # chromadb -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # chromadb - # fastapi - # huggingface-hub - # opentelemetry-sdk - # pydantic - # pydantic-core - # starlette - # typer - # uvicorn -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # kubernetes - # requests -uvicorn[standard]==0.30.6 - # via chromadb -uvloop==0.20.0 - # via uvicorn -watchfiles==0.24.0 - # via uvicorn -websocket-client==1.8.0 - # via kubernetes -websockets==13.0.1 - # via uvicorn -wrapt==1.16.0 - # via - # -c ./ingest/../base.txt - # deprecated - # opentelemetry-instrumentation -zipp==3.20.2 - # via - # importlib-metadata - # importlib-resources - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements/ingest/clarifai.in b/requirements/ingest/clarifai.in deleted file mode 100644 index becc852ac..000000000 --- a/requirements/ingest/clarifai.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -clarifai diff --git a/requirements/ingest/clarifai.txt b/requirements/ingest/clarifai.txt deleted file mode 100644 index 15c99bf72..000000000 --- a/requirements/ingest/clarifai.txt +++ /dev/null @@ -1,83 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/clarifai.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -clarifai==10.7.0 - # via -r ./ingest/clarifai.in -clarifai-grpc==10.7.1 - # via clarifai -contextlib2==21.6.0 - # via schema -googleapis-common-protos==1.65.0 - # via clarifai-grpc -grpcio==1.66.1 - # via - # -c ./ingest/../deps/constraints.txt - # clarifai-grpc -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -inquirerpy==0.3.4 - # via clarifai -markdown-it-py==3.0.0 - # via rich -mdurl==0.1.2 - # via markdown-it-py -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # clarifai - # tritonclient -pfzy==0.3.4 - # via inquirerpy -pillow==10.4.0 - # via clarifai -prompt-toolkit==3.0.47 - # via inquirerpy -protobuf==4.25.5 - # via - # -c ./ingest/../deps/constraints.txt - # clarifai-grpc - # googleapis-common-protos -pygments==2.18.0 - # via rich -python-rapidjson==1.20 - # via tritonclient -pyyaml==6.0.2 - # via clarifai -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # clarifai-grpc -rich==13.8.1 - # via clarifai -schema==0.7.5 - # via clarifai -tabulate==0.9.0 - # via - # -c ./ingest/../base.txt - # clarifai -tqdm==4.66.5 - # via - # -c ./ingest/../base.txt - # clarifai -tritonclient==2.41.1 - # via clarifai -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -wcwidth==0.2.13 - # via prompt-toolkit diff --git a/requirements/ingest/confluence.in b/requirements/ingest/confluence.in deleted file mode 100644 index 37f92cb8c..000000000 --- a/requirements/ingest/confluence.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -atlassian-python-api diff --git a/requirements/ingest/confluence.txt b/requirements/ingest/confluence.txt deleted file mode 100644 index a54fa71d8..000000000 --- a/requirements/ingest/confluence.txt +++ /dev/null @@ -1,56 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/confluence.in -# -atlassian-python-api==3.41.16 - # via -r ./ingest/confluence.in -beautifulsoup4==4.12.3 - # via - # -c ./ingest/../base.txt - # atlassian-python-api -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -deprecated==1.2.14 - # via atlassian-python-api -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -jmespath==1.0.1 - # via atlassian-python-api -oauthlib==3.2.2 - # via - # atlassian-python-api - # requests-oauthlib -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # atlassian-python-api - # requests-oauthlib -requests-oauthlib==2.0.0 - # via atlassian-python-api -six==1.16.0 - # via - # -c ./ingest/../base.txt - # atlassian-python-api -soupsieve==2.6 - # via - # -c ./ingest/../base.txt - # beautifulsoup4 -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -wrapt==1.16.0 - # via - # -c ./ingest/../base.txt - # deprecated diff --git a/requirements/ingest/databricks-volumes.in b/requirements/ingest/databricks-volumes.in deleted file mode 100644 index 8bad8aec3..000000000 --- a/requirements/ingest/databricks-volumes.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -databricks-sdk diff --git a/requirements/ingest/databricks-volumes.txt b/requirements/ingest/databricks-volumes.txt deleted file mode 100644 index ac6f34cc5..000000000 --- a/requirements/ingest/databricks-volumes.txt +++ /dev/null @@ -1,41 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/databricks-volumes.in -# -cachetools==5.5.0 - # via google-auth -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -databricks-sdk==0.32.3 - # via -r ./ingest/databricks-volumes.in -google-auth==2.35.0 - # via databricks-sdk -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # databricks-sdk -rsa==4.9 - # via google-auth -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/delta-table.in b/requirements/ingest/delta-table.in deleted file mode 100644 index 47d4079bd..000000000 --- a/requirements/ingest/delta-table.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -deltalake<=0.19.1 -fsspec diff --git a/requirements/ingest/delta-table.txt b/requirements/ingest/delta-table.txt deleted file mode 100644 index 68f8dfae7..000000000 --- a/requirements/ingest/delta-table.txt +++ /dev/null @@ -1,16 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/delta-table.in -# -deltalake==0.19.1 - # via -r ./ingest/delta-table.in -fsspec==2024.9.0 - # via -r ./ingest/delta-table.in -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # pyarrow -pyarrow==17.0.0 - # via deltalake diff --git a/requirements/ingest/discord.in b/requirements/ingest/discord.in deleted file mode 100644 index 83bbeed43..000000000 --- a/requirements/ingest/discord.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -discord-py diff --git a/requirements/ingest/discord.txt b/requirements/ingest/discord.txt deleted file mode 100644 index 6368195ea..000000000 --- a/requirements/ingest/discord.txt +++ /dev/null @@ -1,36 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/discord.in -# -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via discord-py -aiosignal==1.3.1 - # via aiohttp -async-timeout==4.0.3 - # via aiohttp -attrs==24.2.0 - # via aiohttp -discord-py==2.4.0 - # via -r ./ingest/discord.in -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -idna==3.10 - # via - # -c ./ingest/../base.txt - # yarl -multidict==6.1.0 - # via - # aiohttp - # yarl -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # multidict -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/dropbox.in b/requirements/ingest/dropbox.in deleted file mode 100644 index b9b0fe1d3..000000000 --- a/requirements/ingest/dropbox.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -dropboxdrivefs -fsspec diff --git a/requirements/ingest/dropbox.txt b/requirements/ingest/dropbox.txt deleted file mode 100644 index bbba1ba1b..000000000 --- a/requirements/ingest/dropbox.txt +++ /dev/null @@ -1,45 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/dropbox.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -dropbox==12.0.2 - # via dropboxdrivefs -dropboxdrivefs==1.4.1 - # via -r ./ingest/dropbox.in -fsspec==2024.9.0 - # via - # -r ./ingest/dropbox.in - # dropboxdrivefs -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -ply==3.11 - # via stone -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # dropbox - # dropboxdrivefs -six==1.16.0 - # via - # -c ./ingest/../base.txt - # dropbox - # stone -stone==3.3.1 - # via dropbox -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/elasticsearch.in b/requirements/ingest/elasticsearch.in deleted file mode 100644 index 5b6d0db36..000000000 --- a/requirements/ingest/elasticsearch.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -elasticsearch[async] diff --git a/requirements/ingest/elasticsearch.txt b/requirements/ingest/elasticsearch.txt deleted file mode 100644 index b23d77117..000000000 --- a/requirements/ingest/elasticsearch.txt +++ /dev/null @@ -1,47 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/elasticsearch.in -# -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via elasticsearch -aiosignal==1.3.1 - # via aiohttp -async-timeout==4.0.3 - # via aiohttp -attrs==24.2.0 - # via aiohttp -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # elastic-transport -elastic-transport==8.15.0 - # via elasticsearch -elasticsearch[async]==8.15.1 - # via -r ./ingest/elasticsearch.in -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -idna==3.10 - # via - # -c ./ingest/../base.txt - # yarl -multidict==6.1.0 - # via - # aiohttp - # yarl -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # multidict -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # elastic-transport -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/embed-aws-bedrock.in b/requirements/ingest/embed-aws-bedrock.in deleted file mode 100644 index dd73b768d..000000000 --- a/requirements/ingest/embed-aws-bedrock.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -boto3 -langchain-community diff --git a/requirements/ingest/embed-aws-bedrock.txt b/requirements/ingest/embed-aws-bedrock.txt deleted file mode 100644 index 69d0e147e..000000000 --- a/requirements/ingest/embed-aws-bedrock.txt +++ /dev/null @@ -1,191 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-aws-bedrock.in -# -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via - # langchain - # langchain-community -aiosignal==1.3.1 - # via aiohttp -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -async-timeout==4.0.3 - # via - # aiohttp - # langchain -attrs==24.2.0 - # via aiohttp -boto3==1.34.131 - # via -r ./ingest/embed-aws-bedrock.in -botocore==1.34.131 - # via - # -c ./ingest/../deps/constraints.txt - # boto3 - # s3transfer -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -dataclasses-json==0.6.7 - # via - # -c ./ingest/../base.txt - # langchain-community -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # langsmith -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests - # yarl -jmespath==1.0.1 - # via - # boto3 - # botocore -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -langchain==0.3.0 - # via langchain-community -langchain-community==0.3.0 - # via -r ./ingest/embed-aws-bedrock.in -langchain-core==0.3.2 - # via - # langchain - # langchain-community - # langchain-text-splitters -langchain-text-splitters==0.3.0 - # via langchain -langsmith==0.1.125 - # via - # langchain - # langchain-community - # langchain-core -marshmallow==3.22.0 - # via - # -c ./ingest/../base.txt - # dataclasses-json -multidict==6.1.0 - # via - # aiohttp - # yarl -mypy-extensions==1.0.0 - # via - # -c ./ingest/../base.txt - # typing-inspect -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # langchain - # langchain-community -orjson==3.10.7 - # via langsmith -packaging==24.1 - # via - # -c ./ingest/../base.txt - # langchain-core - # marshmallow -pydantic==2.9.2 - # via - # langchain - # langchain-core - # langsmith - # pydantic-settings -pydantic-core==2.23.4 - # via pydantic -pydantic-settings==2.5.2 - # via langchain-community -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # botocore -python-dotenv==1.0.1 - # via pydantic-settings -pyyaml==6.0.2 - # via - # langchain - # langchain-community - # langchain-core -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # langchain - # langchain-community - # langsmith -s3transfer==0.10.2 - # via boto3 -six==1.16.0 - # via - # -c ./ingest/../base.txt - # python-dateutil -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -sqlalchemy==2.0.35 - # via - # langchain - # langchain-community -tenacity==8.5.0 - # via - # langchain - # langchain-community - # langchain-core -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # langchain-core - # multidict - # pydantic - # pydantic-core - # sqlalchemy - # typing-inspect -typing-inspect==0.9.0 - # via - # -c ./ingest/../base.txt - # dataclasses-json -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # botocore - # requests -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/embed-huggingface.in b/requirements/ingest/embed-huggingface.in deleted file mode 100644 index 88b7218f8..000000000 --- a/requirements/ingest/embed-huggingface.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt - -langchain-huggingface diff --git a/requirements/ingest/embed-huggingface.txt b/requirements/ingest/embed-huggingface.txt deleted file mode 100644 index 24756b413..000000000 --- a/requirements/ingest/embed-huggingface.txt +++ /dev/null @@ -1,170 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-huggingface.in -# -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -filelock==3.16.1 - # via - # huggingface-hub - # torch - # transformers -fsspec==2024.9.0 - # via - # huggingface-hub - # torch -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # langsmith -huggingface-hub==0.25.0 - # via - # langchain-huggingface - # sentence-transformers - # tokenizers - # transformers -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests -jinja2==3.1.4 - # via torch -joblib==1.4.2 - # via - # -c ./ingest/../base.txt - # scikit-learn -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -langchain-core==0.3.2 - # via langchain-huggingface -langchain-huggingface==0.1.0 - # via -r ./ingest/embed-huggingface.in -langsmith==0.1.125 - # via langchain-core -markupsafe==2.1.5 - # via jinja2 -mpmath==1.3.0 - # via sympy -networkx==3.2.1 - # via torch -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # scikit-learn - # scipy - # transformers -orjson==3.10.7 - # via langsmith -packaging==24.1 - # via - # -c ./ingest/../base.txt - # huggingface-hub - # langchain-core - # transformers -pillow==10.4.0 - # via sentence-transformers -pydantic==2.9.2 - # via - # langchain-core - # langsmith -pydantic-core==2.23.4 - # via pydantic -pyyaml==6.0.2 - # via - # huggingface-hub - # langchain-core - # transformers -regex==2024.9.11 - # via - # -c ./ingest/../base.txt - # transformers -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # huggingface-hub - # langsmith - # transformers -safetensors==0.4.5 - # via transformers -scikit-learn==1.5.2 - # via sentence-transformers -scipy==1.13.1 - # via - # scikit-learn - # sentence-transformers -sentence-transformers==3.1.1 - # via langchain-huggingface -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -sympy==1.13.3 - # via torch -tenacity==8.5.0 - # via langchain-core -threadpoolctl==3.5.0 - # via scikit-learn -tokenizers==0.19.1 - # via - # -c ./ingest/../deps/constraints.txt - # langchain-huggingface - # transformers -torch==2.4.1 - # via sentence-transformers -tqdm==4.66.5 - # via - # -c ./ingest/../base.txt - # huggingface-hub - # sentence-transformers - # transformers -transformers==4.44.2 - # via - # langchain-huggingface - # sentence-transformers -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # huggingface-hub - # langchain-core - # pydantic - # pydantic-core - # torch -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/embed-mixedbreadai.in b/requirements/ingest/embed-mixedbreadai.in deleted file mode 100644 index 929e3f0ae..000000000 --- a/requirements/ingest/embed-mixedbreadai.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -mixedbread-ai \ No newline at end of file diff --git a/requirements/ingest/embed-mixedbreadai.txt b/requirements/ingest/embed-mixedbreadai.txt deleted file mode 100644 index da63dcbe9..000000000 --- a/requirements/ingest/embed-mixedbreadai.txt +++ /dev/null @@ -1,56 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-mixedbreadai.in -# -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # mixedbread-ai -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -mixedbread-ai==2.2.6 - # via -r ./ingest/embed-mixedbreadai.in -pydantic==2.9.2 - # via mixedbread-ai -pydantic-core==2.23.4 - # via pydantic -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # mixedbread-ai - # pydantic - # pydantic-core diff --git a/requirements/ingest/embed-octoai.in b/requirements/ingest/embed-octoai.in deleted file mode 100644 index ede6c81e8..000000000 --- a/requirements/ingest/embed-octoai.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -openai -tiktoken diff --git a/requirements/ingest/embed-octoai.txt b/requirements/ingest/embed-octoai.txt deleted file mode 100644 index 87d04cc36..000000000 --- a/requirements/ingest/embed-octoai.txt +++ /dev/null @@ -1,87 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-octoai.in -# -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx - # openai -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -distro==1.9.0 - # via openai -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # openai -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests -jiter==0.5.0 - # via openai -openai==1.46.1 - # via -r ./ingest/embed-octoai.in -pydantic==2.9.2 - # via openai -pydantic-core==2.23.4 - # via pydantic -regex==2024.9.11 - # via - # -c ./ingest/../base.txt - # tiktoken -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # tiktoken -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # openai -tiktoken==0.7.0 - # via -r ./ingest/embed-octoai.in -tqdm==4.66.5 - # via - # -c ./ingest/../base.txt - # openai -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # openai - # pydantic - # pydantic-core -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/embed-openai.in b/requirements/ingest/embed-openai.in deleted file mode 100644 index fb130e9cb..000000000 --- a/requirements/ingest/embed-openai.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt - -langchain-openai diff --git a/requirements/ingest/embed-openai.txt b/requirements/ingest/embed-openai.txt deleted file mode 100644 index 7490efc76..000000000 --- a/requirements/ingest/embed-openai.txt +++ /dev/null @@ -1,113 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-openai.in -# -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx - # openai -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -distro==1.9.0 - # via openai -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # langsmith - # openai -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests -jiter==0.5.0 - # via openai -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -langchain-core==0.3.2 - # via langchain-openai -langchain-openai==0.2.0 - # via -r ./ingest/embed-openai.in -langsmith==0.1.125 - # via langchain-core -openai==1.46.1 - # via langchain-openai -orjson==3.10.7 - # via langsmith -packaging==24.1 - # via - # -c ./ingest/../base.txt - # langchain-core -pydantic==2.9.2 - # via - # langchain-core - # langsmith - # openai -pydantic-core==2.23.4 - # via pydantic -pyyaml==6.0.2 - # via langchain-core -regex==2024.9.11 - # via - # -c ./ingest/../base.txt - # tiktoken -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # langsmith - # tiktoken -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # openai -tenacity==8.5.0 - # via langchain-core -tiktoken==0.7.0 - # via langchain-openai -tqdm==4.66.5 - # via - # -c ./ingest/../base.txt - # openai -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # langchain-core - # openai - # pydantic - # pydantic-core -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/embed-vertexai.in b/requirements/ingest/embed-vertexai.in deleted file mode 100644 index ba68465a8..000000000 --- a/requirements/ingest/embed-vertexai.in +++ /dev/null @@ -1,5 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -langchain -langchain-community -langchain-google-vertexai diff --git a/requirements/ingest/embed-vertexai.txt b/requirements/ingest/embed-vertexai.txt deleted file mode 100644 index a912d25cb..000000000 --- a/requirements/ingest/embed-vertexai.txt +++ /dev/null @@ -1,275 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-vertexai.in -# -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via - # langchain - # langchain-community -aiosignal==1.3.1 - # via aiohttp -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -async-timeout==4.0.3 - # via - # aiohttp - # langchain -attrs==24.2.0 - # via aiohttp -cachetools==5.5.0 - # via google-auth -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -dataclasses-json==0.6.7 - # via - # -c ./ingest/../base.txt - # langchain-community -docstring-parser==0.16 - # via google-cloud-aiplatform -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -google-api-core[grpc]==2.20.0 - # via - # google-cloud-aiplatform - # google-cloud-bigquery - # google-cloud-core - # google-cloud-resource-manager - # google-cloud-storage -google-auth==2.35.0 - # via - # google-api-core - # google-cloud-aiplatform - # google-cloud-bigquery - # google-cloud-core - # google-cloud-resource-manager - # google-cloud-storage -google-cloud-aiplatform==1.67.1 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via - # google-cloud-bigquery - # google-cloud-storage -google-cloud-resource-manager==1.12.5 - # via google-cloud-aiplatform -google-cloud-storage==2.18.2 - # via - # google-cloud-aiplatform - # langchain-google-vertexai -google-crc32c==1.6.0 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 - # via - # google-cloud-bigquery - # google-cloud-storage -googleapis-common-protos[grpc]==1.65.0 - # via - # google-api-core - # grpc-google-iam-v1 - # grpcio-status -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager -grpcio==1.66.1 - # via - # -c ./ingest/../deps/constraints.txt - # google-api-core - # googleapis-common-protos - # grpc-google-iam-v1 - # grpcio-status -grpcio-status==1.62.3 - # via google-api-core -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # langchain-google-vertexai - # langsmith -httpx-sse==0.4.0 - # via langchain-google-vertexai -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests - # yarl -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -langchain==0.3.0 - # via - # -r ./ingest/embed-vertexai.in - # langchain-community -langchain-community==0.3.0 - # via -r ./ingest/embed-vertexai.in -langchain-core==0.3.2 - # via - # langchain - # langchain-community - # langchain-google-vertexai - # langchain-text-splitters -langchain-google-vertexai==2.0.1 - # via -r ./ingest/embed-vertexai.in -langchain-text-splitters==0.3.0 - # via langchain -langsmith==0.1.125 - # via - # langchain - # langchain-community - # langchain-core -marshmallow==3.22.0 - # via - # -c ./ingest/../base.txt - # dataclasses-json -multidict==6.1.0 - # via - # aiohttp - # yarl -mypy-extensions==1.0.0 - # via - # -c ./ingest/../base.txt - # typing-inspect -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # langchain - # langchain-community - # shapely -orjson==3.10.7 - # via langsmith -packaging==24.1 - # via - # -c ./ingest/../base.txt - # google-cloud-aiplatform - # google-cloud-bigquery - # langchain-core - # marshmallow -proto-plus==1.24.0 - # via - # google-api-core - # google-cloud-aiplatform - # google-cloud-resource-manager -protobuf==4.25.5 - # via - # -c ./ingest/../deps/constraints.txt - # google-api-core - # google-cloud-aiplatform - # google-cloud-resource-manager - # googleapis-common-protos - # grpc-google-iam-v1 - # grpcio-status - # proto-plus -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -pydantic==2.9.2 - # via - # google-cloud-aiplatform - # langchain - # langchain-core - # langchain-google-vertexai - # langsmith - # pydantic-settings -pydantic-core==2.23.4 - # via pydantic -pydantic-settings==2.5.2 - # via langchain-community -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # google-cloud-bigquery -python-dotenv==1.0.1 - # via pydantic-settings -pyyaml==6.0.2 - # via - # langchain - # langchain-community - # langchain-core -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # google-api-core - # google-cloud-bigquery - # google-cloud-storage - # langchain - # langchain-community - # langsmith -rsa==4.9 - # via google-auth -shapely==2.0.6 - # via google-cloud-aiplatform -six==1.16.0 - # via - # -c ./ingest/../base.txt - # python-dateutil -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -sqlalchemy==2.0.35 - # via - # langchain - # langchain-community -tenacity==8.5.0 - # via - # langchain - # langchain-community - # langchain-core -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # langchain-core - # multidict - # pydantic - # pydantic-core - # sqlalchemy - # typing-inspect -typing-inspect==0.9.0 - # via - # -c ./ingest/../base.txt - # dataclasses-json -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/embed-voyageai.in b/requirements/ingest/embed-voyageai.in deleted file mode 100644 index efe01c7b0..000000000 --- a/requirements/ingest/embed-voyageai.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -langchain -langchain-voyageai diff --git a/requirements/ingest/embed-voyageai.txt b/requirements/ingest/embed-voyageai.txt deleted file mode 100644 index a1d3572e8..000000000 --- a/requirements/ingest/embed-voyageai.txt +++ /dev/null @@ -1,147 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/embed-voyageai.in -# -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via - # langchain - # voyageai -aiolimiter==1.1.0 - # via voyageai -aiosignal==1.3.1 - # via aiohttp -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -async-timeout==4.0.3 - # via - # aiohttp - # langchain -attrs==24.2.0 - # via aiohttp -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # langsmith -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx - # requests - # yarl -jsonpatch==1.33 - # via langchain-core -jsonpointer==3.0.0 - # via jsonpatch -langchain==0.3.0 - # via -r ./ingest/embed-voyageai.in -langchain-core==0.3.2 - # via - # langchain - # langchain-text-splitters - # langchain-voyageai -langchain-text-splitters==0.3.0 - # via langchain -langchain-voyageai==0.1.2 - # via -r ./ingest/embed-voyageai.in -langsmith==0.1.125 - # via - # langchain - # langchain-core -multidict==6.1.0 - # via - # aiohttp - # yarl -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # langchain - # voyageai -orjson==3.10.7 - # via langsmith -packaging==24.1 - # via - # -c ./ingest/../base.txt - # langchain-core -pydantic==2.9.2 - # via - # langchain - # langchain-core - # langchain-voyageai - # langsmith -pydantic-core==2.23.4 - # via pydantic -pyyaml==6.0.2 - # via - # langchain - # langchain-core -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # langchain - # langsmith - # voyageai -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -sqlalchemy==2.0.35 - # via langchain -tenacity==8.5.0 - # via - # langchain - # langchain-core - # voyageai -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # langchain-core - # multidict - # pydantic - # pydantic-core - # sqlalchemy -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -voyageai==0.2.3 - # via langchain-voyageai -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/gcs.in b/requirements/ingest/gcs.in deleted file mode 100644 index 842468b00..000000000 --- a/requirements/ingest/gcs.in +++ /dev/null @@ -1,5 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -gcsfs -fsspec -bs4 diff --git a/requirements/ingest/gcs.txt b/requirements/ingest/gcs.txt deleted file mode 100644 index c2954c0b1..000000000 --- a/requirements/ingest/gcs.txt +++ /dev/null @@ -1,120 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/gcs.in -# -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via gcsfs -aiosignal==1.3.1 - # via aiohttp -async-timeout==4.0.3 - # via aiohttp -attrs==24.2.0 - # via aiohttp -beautifulsoup4==4.12.3 - # via - # -c ./ingest/../base.txt - # bs4 -bs4==0.0.2 - # via -r ./ingest/gcs.in -cachetools==5.5.0 - # via google-auth -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -decorator==5.1.1 - # via gcsfs -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -fsspec==2024.9.0 - # via - # -r ./ingest/gcs.in - # gcsfs -gcsfs==2024.9.0.post1 - # via -r ./ingest/gcs.in -google-api-core==2.20.0 - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.35.0 - # via - # gcsfs - # google-api-core - # google-auth-oauthlib - # google-cloud-core - # google-cloud-storage -google-auth-oauthlib==1.2.1 - # via gcsfs -google-cloud-core==2.4.1 - # via google-cloud-storage -google-cloud-storage==2.18.2 - # via gcsfs -google-crc32c==1.6.0 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 - # via google-cloud-storage -googleapis-common-protos==1.65.0 - # via google-api-core -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests - # yarl -multidict==6.1.0 - # via - # aiohttp - # yarl -oauthlib==3.2.2 - # via requests-oauthlib -proto-plus==1.24.0 - # via google-api-core -protobuf==4.25.5 - # via - # -c ./ingest/../deps/constraints.txt - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # gcsfs - # google-api-core - # google-cloud-storage - # requests-oauthlib -requests-oauthlib==2.0.0 - # via google-auth-oauthlib -rsa==4.9 - # via google-auth -soupsieve==2.6 - # via - # -c ./ingest/../base.txt - # beautifulsoup4 -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # multidict -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/github.in b/requirements/ingest/github.in deleted file mode 100644 index 599585d7a..000000000 --- a/requirements/ingest/github.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -# NOTE - pygithub==1.58.0 fails due to https://github.com/PyGithub/PyGithub/issues/2436 -pygithub>1.58.0 diff --git a/requirements/ingest/github.txt b/requirements/ingest/github.txt deleted file mode 100644 index 0d8fa1ac5..000000000 --- a/requirements/ingest/github.txt +++ /dev/null @@ -1,57 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/github.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography - # pynacl -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # pyjwt -deprecated==1.2.14 - # via pygithub -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pygithub==2.4.0 - # via -r ./ingest/github.in -pyjwt[crypto]==2.9.0 - # via pygithub -pynacl==1.5.0 - # via pygithub -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # pygithub -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # pygithub -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # pygithub - # requests -wrapt==1.16.0 - # via - # -c ./ingest/../base.txt - # deprecated diff --git a/requirements/ingest/gitlab.in b/requirements/ingest/gitlab.in deleted file mode 100644 index 86be2c44b..000000000 --- a/requirements/ingest/gitlab.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -python-gitlab diff --git a/requirements/ingest/gitlab.txt b/requirements/ingest/gitlab.txt deleted file mode 100644 index 77d5743ba..000000000 --- a/requirements/ingest/gitlab.txt +++ /dev/null @@ -1,34 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/gitlab.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -python-gitlab==4.11.1 - # via -r ./ingest/gitlab.in -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # python-gitlab - # requests-toolbelt -requests-toolbelt==1.0.0 - # via - # -c ./ingest/../base.txt - # python-gitlab -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/google-drive.in b/requirements/ingest/google-drive.in deleted file mode 100644 index e95e27f71..000000000 --- a/requirements/ingest/google-drive.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -google-api-python-client diff --git a/requirements/ingest/google-drive.txt b/requirements/ingest/google-drive.txt deleted file mode 100644 index 54726088b..000000000 --- a/requirements/ingest/google-drive.txt +++ /dev/null @@ -1,66 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/google-drive.in -# -cachetools==5.5.0 - # via google-auth -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -google-api-core==2.20.0 - # via google-api-python-client -google-api-python-client==2.146.0 - # via -r ./ingest/google-drive.in -google-auth==2.35.0 - # via - # google-api-core - # google-api-python-client - # google-auth-httplib2 -google-auth-httplib2==0.2.0 - # via google-api-python-client -googleapis-common-protos==1.65.0 - # via google-api-core -httplib2==0.22.0 - # via - # google-api-python-client - # google-auth-httplib2 -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -proto-plus==1.24.0 - # via google-api-core -protobuf==4.25.5 - # via - # -c ./ingest/../deps/constraints.txt - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 - # via google-auth -pyparsing==3.1.4 - # via httplib2 -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # google-api-core -rsa==4.9 - # via google-auth -uritemplate==4.1.1 - # via google-api-python-client -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/hubspot.in b/requirements/ingest/hubspot.in deleted file mode 100644 index 033413401..000000000 --- a/requirements/ingest/hubspot.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -hubspot-api-client -urllib3 diff --git a/requirements/ingest/hubspot.txt b/requirements/ingest/hubspot.txt deleted file mode 100644 index da9b63b37..000000000 --- a/requirements/ingest/hubspot.txt +++ /dev/null @@ -1,27 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/hubspot.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # hubspot-api-client -hubspot-api-client==9.0.0 - # via -r ./ingest/hubspot.in -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # hubspot-api-client -six==1.16.0 - # via - # -c ./ingest/../base.txt - # hubspot-api-client - # python-dateutil -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # -r ./ingest/hubspot.in - # hubspot-api-client diff --git a/requirements/ingest/ingest.txt b/requirements/ingest/ingest.txt new file mode 100644 index 000000000..957f788f1 --- /dev/null +++ b/requirements/ingest/ingest.txt @@ -0,0 +1,5 @@ +unstructured-ingest[airtable, astradb, azure, azure-cognitive-search, bedrock, biomed, box, chroma, clarifai, confluence, couchbase, databricks-volumes, delta-table, discord, dropbox, elasticsearch, embed-huggingface, embed-octoai, embed-vertexai, embed-voyageai, gcs, github, gitlab, google-drive, hubspot, jira, kafka, kdbai, milvus, mongodb, notion, onedrive, openai, opensearch, outlook, pinecone, postgres, qdrant, reddit, remote, s3, salesforce, sftp, sharepoint, singlestore, slack, vectara, weaviate, wikipedia] +s3fs>=2024.9.0 +urllib3>=1.26.20 +backoff>=2.2.1 +httpx>=0.27.2 diff --git a/requirements/ingest/jira.in b/requirements/ingest/jira.in deleted file mode 100644 index 37f92cb8c..000000000 --- a/requirements/ingest/jira.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -atlassian-python-api diff --git a/requirements/ingest/jira.txt b/requirements/ingest/jira.txt deleted file mode 100644 index 7b2f98861..000000000 --- a/requirements/ingest/jira.txt +++ /dev/null @@ -1,56 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/jira.in -# -atlassian-python-api==3.41.16 - # via -r ./ingest/jira.in -beautifulsoup4==4.12.3 - # via - # -c ./ingest/../base.txt - # atlassian-python-api -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -deprecated==1.2.14 - # via atlassian-python-api -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -jmespath==1.0.1 - # via atlassian-python-api -oauthlib==3.2.2 - # via - # atlassian-python-api - # requests-oauthlib -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # atlassian-python-api - # requests-oauthlib -requests-oauthlib==2.0.0 - # via atlassian-python-api -six==1.16.0 - # via - # -c ./ingest/../base.txt - # atlassian-python-api -soupsieve==2.6 - # via - # -c ./ingest/../base.txt - # beautifulsoup4 -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -wrapt==1.16.0 - # via - # -c ./ingest/../base.txt - # deprecated diff --git a/requirements/ingest/kafka.in b/requirements/ingest/kafka.in deleted file mode 100644 index 25b9ad2f6..000000000 --- a/requirements/ingest/kafka.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -confluent-kafka \ No newline at end of file diff --git a/requirements/ingest/kafka.txt b/requirements/ingest/kafka.txt deleted file mode 100644 index 4dbb8306d..000000000 --- a/requirements/ingest/kafka.txt +++ /dev/null @@ -1,8 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/kafka.in -# -confluent-kafka==2.5.3 - # via -r ./ingest/kafka.in diff --git a/requirements/ingest/mongodb.in b/requirements/ingest/mongodb.in deleted file mode 100644 index 48f292290..000000000 --- a/requirements/ingest/mongodb.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -pymongo diff --git a/requirements/ingest/mongodb.txt b/requirements/ingest/mongodb.txt deleted file mode 100644 index 778a13fc4..000000000 --- a/requirements/ingest/mongodb.txt +++ /dev/null @@ -1,10 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/mongodb.in -# -dnspython==2.6.1 - # via pymongo -pymongo==4.9.1 - # via -r ./ingest/mongodb.in diff --git a/requirements/ingest/notion.in b/requirements/ingest/notion.in deleted file mode 100644 index 47823a112..000000000 --- a/requirements/ingest/notion.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -notion-client -htmlBuilder diff --git a/requirements/ingest/notion.txt b/requirements/ingest/notion.txt deleted file mode 100644 index ea8a45578..000000000 --- a/requirements/ingest/notion.txt +++ /dev/null @@ -1,49 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/notion.in -# -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -htmlbuilder==1.0.0 - # via -r ./ingest/notion.in -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx==0.27.2 - # via - # -c ./ingest/../base.txt - # notion-client -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -notion-client==2.2.1 - # via -r ./ingest/notion.in -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio diff --git a/requirements/ingest/onedrive.in b/requirements/ingest/onedrive.in deleted file mode 100644 index c53222881..000000000 --- a/requirements/ingest/onedrive.in +++ /dev/null @@ -1,5 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -msal -Office365-REST-Python-Client -bs4 diff --git a/requirements/ingest/onedrive.txt b/requirements/ingest/onedrive.txt deleted file mode 100644 index 88330e86e..000000000 --- a/requirements/ingest/onedrive.txt +++ /dev/null @@ -1,65 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/onedrive.in -# -beautifulsoup4==4.12.3 - # via - # -c ./ingest/../base.txt - # bs4 -bs4==0.0.2 - # via -r ./ingest/onedrive.in -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # msal - # pyjwt -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -msal==1.31.0 - # via - # -r ./ingest/onedrive.in - # office365-rest-python-client -office365-rest-python-client==2.5.13 - # via -r ./ingest/onedrive.in -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pyjwt[crypto]==2.9.0 - # via msal -pytz==2024.2 - # via office365-rest-python-client -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # msal - # office365-rest-python-client -soupsieve==2.6 - # via - # -c ./ingest/../base.txt - # beautifulsoup4 -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # office365-rest-python-client -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/opensearch.in b/requirements/ingest/opensearch.in deleted file mode 100644 index ac336e8d1..000000000 --- a/requirements/ingest/opensearch.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -opensearch-py diff --git a/requirements/ingest/opensearch.txt b/requirements/ingest/opensearch.txt deleted file mode 100644 index 03a011830..000000000 --- a/requirements/ingest/opensearch.txt +++ /dev/null @@ -1,41 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/opensearch.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # opensearch-py - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -events==0.5 - # via opensearch-py -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -opensearch-py==2.7.1 - # via -r ./ingest/opensearch.in -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # opensearch-py -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # opensearch-py -six==1.16.0 - # via - # -c ./ingest/../base.txt - # python-dateutil -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # opensearch-py - # requests diff --git a/requirements/ingest/outlook.in b/requirements/ingest/outlook.in deleted file mode 100644 index 3b65d3029..000000000 --- a/requirements/ingest/outlook.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -msal -Office365-REST-Python-Client diff --git a/requirements/ingest/outlook.txt b/requirements/ingest/outlook.txt deleted file mode 100644 index f73262791..000000000 --- a/requirements/ingest/outlook.txt +++ /dev/null @@ -1,55 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/outlook.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # msal - # pyjwt -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -msal==1.31.0 - # via - # -r ./ingest/outlook.in - # office365-rest-python-client -office365-rest-python-client==2.5.13 - # via -r ./ingest/outlook.in -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pyjwt[crypto]==2.9.0 - # via msal -pytz==2024.2 - # via office365-rest-python-client -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # msal - # office365-rest-python-client -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # office365-rest-python-client -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/pinecone.in b/requirements/ingest/pinecone.in deleted file mode 100644 index 985accf43..000000000 --- a/requirements/ingest/pinecone.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -pinecone-client>=3.7.1 diff --git a/requirements/ingest/pinecone.txt b/requirements/ingest/pinecone.txt deleted file mode 100644 index 68a6197ff..000000000 --- a/requirements/ingest/pinecone.txt +++ /dev/null @@ -1,31 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/pinecone.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # pinecone-client -pinecone-client==5.0.1 - # via -r ./ingest/pinecone.in -pinecone-plugin-inference==1.1.0 - # via pinecone-client -pinecone-plugin-interface==0.0.7 - # via - # pinecone-client - # pinecone-plugin-inference -tqdm==4.66.5 - # via - # -c ./ingest/../base.txt - # pinecone-client -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # pinecone-client -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # pinecone-client diff --git a/requirements/ingest/postgres.in b/requirements/ingest/postgres.in deleted file mode 100644 index f57ac1a36..000000000 --- a/requirements/ingest/postgres.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -psycopg2-binary diff --git a/requirements/ingest/postgres.txt b/requirements/ingest/postgres.txt deleted file mode 100644 index 813ca1616..000000000 --- a/requirements/ingest/postgres.txt +++ /dev/null @@ -1,8 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/postgres.in -# -psycopg2-binary==2.9.9 - # via -r ./ingest/postgres.in diff --git a/requirements/ingest/qdrant.in b/requirements/ingest/qdrant.in deleted file mode 100644 index 051f54715..000000000 --- a/requirements/ingest/qdrant.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -qdrant-client diff --git a/requirements/ingest/qdrant.txt b/requirements/ingest/qdrant.txt deleted file mode 100644 index 0ea8c17ae..000000000 --- a/requirements/ingest/qdrant.txt +++ /dev/null @@ -1,86 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/qdrant.in -# -annotated-types==0.7.0 - # via pydantic -anyio==4.5.0 - # via - # -c ./ingest/../base.txt - # httpx -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # httpcore - # httpx -exceptiongroup==1.2.2 - # via - # -c ./ingest/../base.txt - # anyio -grpcio==1.66.1 - # via - # -c ./ingest/../deps/constraints.txt - # grpcio-tools - # qdrant-client -grpcio-tools==1.62.3 - # via qdrant-client -h11==0.14.0 - # via - # -c ./ingest/../base.txt - # httpcore -h2==4.1.0 - # via httpx -hpack==4.0.0 - # via h2 -httpcore==1.0.5 - # via - # -c ./ingest/../base.txt - # httpx -httpx[http2]==0.27.2 - # via - # -c ./ingest/../base.txt - # qdrant-client -hyperframe==6.0.1 - # via h2 -idna==3.10 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -numpy==1.26.4 - # via - # -c ./ingest/../base.txt - # qdrant-client -portalocker==2.10.1 - # via qdrant-client -protobuf==4.25.5 - # via - # -c ./ingest/../deps/constraints.txt - # grpcio-tools -pydantic==2.9.2 - # via qdrant-client -pydantic-core==2.23.4 - # via pydantic -qdrant-client==1.11.2 - # via -r ./ingest/qdrant.in -sniffio==1.3.1 - # via - # -c ./ingest/../base.txt - # anyio - # httpx -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # anyio - # pydantic - # pydantic-core -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # qdrant-client - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements/ingest/reddit.in b/requirements/ingest/reddit.in deleted file mode 100644 index 5fa199c8c..000000000 --- a/requirements/ingest/reddit.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -praw diff --git a/requirements/ingest/reddit.txt b/requirements/ingest/reddit.txt deleted file mode 100644 index 2d5bd0302..000000000 --- a/requirements/ingest/reddit.txt +++ /dev/null @@ -1,36 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/reddit.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -praw==7.7.1 - # via -r ./ingest/reddit.in -prawcore==2.4.0 - # via praw -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # prawcore - # update-checker -update-checker==0.18.0 - # via praw -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -websocket-client==1.8.0 - # via praw diff --git a/requirements/ingest/s3.in b/requirements/ingest/s3.in deleted file mode 100644 index 43c7b2ecf..000000000 --- a/requirements/ingest/s3.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -s3fs -fsspec diff --git a/requirements/ingest/s3.txt b/requirements/ingest/s3.txt deleted file mode 100644 index 98bb2313b..000000000 --- a/requirements/ingest/s3.txt +++ /dev/null @@ -1,70 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/s3.in -# -aiobotocore==2.13.3 - # via s3fs -aiohappyeyeballs==2.4.0 - # via aiohttp -aiohttp==3.10.5 - # via - # aiobotocore - # s3fs -aioitertools==0.12.0 - # via aiobotocore -aiosignal==1.3.1 - # via aiohttp -async-timeout==4.0.3 - # via aiohttp -attrs==24.2.0 - # via aiohttp -botocore==1.34.131 - # via - # -c ./ingest/../deps/constraints.txt - # aiobotocore -frozenlist==1.4.1 - # via - # aiohttp - # aiosignal -fsspec==2024.9.0 - # via - # -r ./ingest/s3.in - # s3fs -idna==3.10 - # via - # -c ./ingest/../base.txt - # yarl -jmespath==1.0.1 - # via botocore -multidict==6.1.0 - # via - # aiohttp - # yarl -python-dateutil==2.9.0.post0 - # via - # -c ./ingest/../base.txt - # botocore -s3fs==2024.9.0 - # via -r ./ingest/s3.in -six==1.16.0 - # via - # -c ./ingest/../base.txt - # python-dateutil -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # aioitertools - # multidict -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # botocore -wrapt==1.16.0 - # via - # -c ./ingest/../base.txt - # aiobotocore -yarl==1.11.1 - # via aiohttp diff --git a/requirements/ingest/salesforce.in b/requirements/ingest/salesforce.in deleted file mode 100644 index 69547e5dd..000000000 --- a/requirements/ingest/salesforce.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -simple-salesforce diff --git a/requirements/ingest/salesforce.txt b/requirements/ingest/salesforce.txt deleted file mode 100644 index a87fe3bf4..000000000 --- a/requirements/ingest/salesforce.txt +++ /dev/null @@ -1,76 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/salesforce.in -# -attrs==24.2.0 - # via zeep -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # pyjwt -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -isodate==0.6.1 - # via zeep -lxml==5.3.0 - # via - # -c ./ingest/../base.txt - # zeep -more-itertools==10.5.0 - # via simple-salesforce -platformdirs==4.3.6 - # via zeep -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pyjwt[crypto]==2.9.0 - # via simple-salesforce -pytz==2024.2 - # via zeep -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # requests-file - # requests-toolbelt - # simple-salesforce - # zeep -requests-file==2.1.0 - # via zeep -requests-toolbelt==1.0.0 - # via - # -c ./ingest/../base.txt - # zeep -simple-salesforce==1.12.6 - # via -r ./ingest/salesforce.in -six==1.16.0 - # via - # -c ./ingest/../base.txt - # isodate -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # simple-salesforce -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -zeep==4.2.1 - # via simple-salesforce diff --git a/requirements/ingest/sftp.in b/requirements/ingest/sftp.in deleted file mode 100644 index e91c3eb34..000000000 --- a/requirements/ingest/sftp.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -fsspec -paramiko diff --git a/requirements/ingest/sftp.txt b/requirements/ingest/sftp.txt deleted file mode 100644 index 149af18f6..000000000 --- a/requirements/ingest/sftp.txt +++ /dev/null @@ -1,27 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/sftp.in -# -bcrypt==4.2.0 - # via paramiko -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography - # pynacl -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # paramiko -fsspec==2024.9.0 - # via -r ./ingest/sftp.in -paramiko==3.5.0 - # via -r ./ingest/sftp.in -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pynacl==1.5.0 - # via paramiko diff --git a/requirements/ingest/sharepoint.in b/requirements/ingest/sharepoint.in deleted file mode 100644 index 3b65d3029..000000000 --- a/requirements/ingest/sharepoint.in +++ /dev/null @@ -1,4 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -msal -Office365-REST-Python-Client diff --git a/requirements/ingest/sharepoint.txt b/requirements/ingest/sharepoint.txt deleted file mode 100644 index 324fc52dd..000000000 --- a/requirements/ingest/sharepoint.txt +++ /dev/null @@ -1,55 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/sharepoint.in -# -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # msal - # pyjwt -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -msal==1.31.0 - # via - # -r ./ingest/sharepoint.in - # office365-rest-python-client -office365-rest-python-client==2.5.13 - # via -r ./ingest/sharepoint.in -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -pyjwt[crypto]==2.9.0 - # via msal -pytz==2024.2 - # via office365-rest-python-client -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # msal - # office365-rest-python-client -typing-extensions==4.12.2 - # via - # -c ./ingest/../base.txt - # office365-rest-python-client -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests diff --git a/requirements/ingest/singlestore.in b/requirements/ingest/singlestore.in deleted file mode 100644 index 5a7e51c28..000000000 --- a/requirements/ingest/singlestore.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -singlestoredb diff --git a/requirements/ingest/singlestore.txt b/requirements/ingest/singlestore.txt deleted file mode 100644 index 226285577..000000000 --- a/requirements/ingest/singlestore.txt +++ /dev/null @@ -1,62 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/singlestore.in -# -build==1.2.2 - # via singlestoredb -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -importlib-metadata==8.5.0 - # via - # -c ./ingest/../deps/constraints.txt - # build -packaging==24.1 - # via - # -c ./ingest/../base.txt - # build -parsimonious==0.10.0 - # via singlestoredb -pyjwt==2.9.0 - # via singlestoredb -pyproject-hooks==1.1.0 - # via build -regex==2024.9.11 - # via - # -c ./ingest/../base.txt - # parsimonious -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # singlestoredb -singlestoredb==1.6.3 - # via -r ./ingest/singlestore.in -sqlparams==6.1.0 - # via singlestoredb -tomli==2.0.1 - # via - # build - # singlestoredb -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -wheel==0.44.0 - # via singlestoredb -zipp==3.20.2 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/requirements/ingest/slack.in b/requirements/ingest/slack.in deleted file mode 100644 index 5eaa80bc7..000000000 --- a/requirements/ingest/slack.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -slack_sdk diff --git a/requirements/ingest/slack.txt b/requirements/ingest/slack.txt deleted file mode 100644 index f518e3e81..000000000 --- a/requirements/ingest/slack.txt +++ /dev/null @@ -1,8 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/slack.in -# -slack-sdk==3.33.1 - # via -r ./ingest/slack.in diff --git a/requirements/ingest/weaviate.in b/requirements/ingest/weaviate.in deleted file mode 100644 index e487fcead..000000000 --- a/requirements/ingest/weaviate.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -weaviate-client diff --git a/requirements/ingest/weaviate.txt b/requirements/ingest/weaviate.txt deleted file mode 100644 index 8c457917f..000000000 --- a/requirements/ingest/weaviate.txt +++ /dev/null @@ -1,45 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/weaviate.in -# -authlib==1.3.2 - # via weaviate-client -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -cffi==1.17.1 - # via - # -c ./ingest/../base.txt - # cryptography -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -cryptography==43.0.1 - # via - # -c ./ingest/../base.txt - # authlib -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -pycparser==2.22 - # via - # -c ./ingest/../base.txt - # cffi -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # weaviate-client -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -validators==0.34.0 - # via weaviate-client -weaviate-client==3.26.7 - # via -r ./ingest/weaviate.in diff --git a/requirements/ingest/wikipedia.in b/requirements/ingest/wikipedia.in deleted file mode 100644 index fb68f0930..000000000 --- a/requirements/ingest/wikipedia.in +++ /dev/null @@ -1,3 +0,0 @@ --c ../deps/constraints.txt --c ../base.txt -wikipedia diff --git a/requirements/ingest/wikipedia.txt b/requirements/ingest/wikipedia.txt deleted file mode 100644 index 790b132de..000000000 --- a/requirements/ingest/wikipedia.txt +++ /dev/null @@ -1,37 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile ./ingest/wikipedia.in -# -beautifulsoup4==4.12.3 - # via - # -c ./ingest/../base.txt - # wikipedia -certifi==2024.8.30 - # via - # -c ./ingest/../base.txt - # requests -charset-normalizer==3.3.2 - # via - # -c ./ingest/../base.txt - # requests -idna==3.10 - # via - # -c ./ingest/../base.txt - # requests -requests==2.32.3 - # via - # -c ./ingest/../base.txt - # wikipedia -soupsieve==2.6 - # via - # -c ./ingest/../base.txt - # beautifulsoup4 -urllib3==1.26.20 - # via - # -c ./ingest/../base.txt - # -c ./ingest/../deps/constraints.txt - # requests -wikipedia==1.4.0 - # via -r ./ingest/wikipedia.in diff --git a/requirements/test.txt b/requirements/test.txt index 9585aaa4d..6c9660091 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -6,7 +6,7 @@ # annotated-types==0.7.0 # via pydantic -anyio==4.5.0 +anyio==4.6.0 # via # -c ./base.txt # httpx @@ -50,7 +50,7 @@ flake8-print==5.0.0 # via -r ./test.in freezegun==1.5.1 # via -r ./test.in -grpcio==1.66.1 +grpcio==1.66.2 # via # -c ././deps/constraints.txt # -r ./test.in @@ -58,7 +58,7 @@ h11==0.14.0 # via # -c ./base.txt # httpcore -httpcore==1.0.5 +httpcore==1.0.6 # via # -c ./base.txt # httpx @@ -115,7 +115,7 @@ packaging==24.1 # -c ./base.txt # black # pytest -pandas==2.2.2 +pandas==2.2.3 # via label-studio-sdk pathspec==0.12.1 # via black @@ -185,7 +185,7 @@ sniffio==1.3.1 # httpx toml==0.10.2 # via liccheck -tomli==2.0.1 +tomli==2.0.2 # via # autoflake # black @@ -216,7 +216,7 @@ typing-extensions==4.12.2 # mypy # pydantic # pydantic-core -tzdata==2024.1 +tzdata==2024.2 # via pandas ujson==5.10.0 # via label-studio-sdk @@ -234,7 +234,7 @@ wrapt==1.16.0 # vcrpy xmljson==0.2.1 # via label-studio-sdk -yarl==1.11.1 +yarl==1.13.1 # via vcrpy # The following packages are considered to be unsafe in a requirements file: diff --git a/scripts/airtable-test-helpers/create_scale_test_components.py b/scripts/airtable-test-helpers/create_scale_test_components.py index af279457c..e2e11cb86 100644 --- a/scripts/airtable-test-helpers/create_scale_test_components.py +++ b/scripts/airtable-test-helpers/create_scale_test_components.py @@ -3,7 +3,7 @@ import os # import pyairtable as pyair from pyairtable import Api -from unstructured.ingest.logger import logger +from unstructured.logger import logger SCALE_TEST_NUMBER_OF_RECORDS = 20_000 diff --git a/scripts/consistent-deps.sh b/scripts/consistent-deps.sh index f210c6acb..81c772894 100755 --- a/scripts/consistent-deps.sh +++ b/scripts/consistent-deps.sh @@ -16,7 +16,7 @@ function join_by { } # NOTE(alan): Add any dependency files here we don't want to include in the resolution. -excludefiles=("requirements//build.txt") +excludefiles=("requirements/ingest/ingest.txt") # Build an array of requirements files. shopt -s nullglob @@ -39,6 +39,8 @@ reqstring=$(join_by ' -r ' "${reqfiles[@]}") reqstring="-r ${reqstring}" # This pip command will attempt to resolve the dependencies without installing anything. pipcommand="pip install --dry-run --ignore-installed ${reqstring}" +echo "dry run install of the following req files:" +echo "${pipcommand}" if $pipcommand >>/dev/null; then echo "Everything looks fine!" else diff --git a/setup.py b/setup.py index 89813f7c1..3b698e12e 100644 --- a/setup.py +++ b/setup.py @@ -121,58 +121,10 @@ setup( "rst": rst_reqs, "tsv": tsv_reqs, "xlsx": xlsx_reqs, - # Extra requirements for data connectors - "airtable": load_requirements("requirements/ingest/airtable.in"), - "astradb": load_requirements("requirements/ingest/astradb.in"), - "azure": load_requirements("requirements/ingest/azure.in"), - "azure-cognitive-search": load_requirements( - "requirements/ingest/azure-cognitive-search.in", - ), - "biomed": load_requirements("requirements/ingest/biomed.in"), - "box": load_requirements("requirements/ingest/box.in"), - "chroma": load_requirements("requirements/ingest/chroma.in"), - "clarifai": load_requirements("requirements/ingest/clarifai.in"), - "confluence": load_requirements("requirements/ingest/confluence.in"), - "delta-table": load_requirements("requirements/ingest/delta-table.in"), - "discord": load_requirements("requirements/ingest/discord.in"), - "dropbox": load_requirements("requirements/ingest/dropbox.in"), - "elasticsearch": load_requirements("requirements/ingest/elasticsearch.in"), - "gcs": load_requirements("requirements/ingest/gcs.in"), - "github": load_requirements("requirements/ingest/github.in"), - "gitlab": load_requirements("requirements/ingest/gitlab.in"), - "google-drive": load_requirements("requirements/ingest/google-drive.in"), - "hubspot": load_requirements("requirements/ingest/hubspot.in"), - "jira": load_requirements("requirements/ingest/jira.in"), - "kafka": load_requirements("requirements/ingest/kafka.in"), - "mongodb": load_requirements("requirements/ingest/mongodb.in"), - "notion": load_requirements("requirements/ingest/notion.in"), - "onedrive": load_requirements("requirements/ingest/onedrive.in"), - "opensearch": load_requirements("requirements/ingest/opensearch.in"), - "outlook": load_requirements("requirements/ingest/outlook.in"), - "pinecone": load_requirements("requirements/ingest/pinecone.in"), - "postgres": load_requirements("requirements/ingest/postgres.in"), - "qdrant": load_requirements("requirements/ingest/qdrant.in"), - "reddit": load_requirements("requirements/ingest/reddit.in"), - "s3": load_requirements("requirements/ingest/s3.in"), - "sharepoint": load_requirements("requirements/ingest/sharepoint.in"), - "salesforce": load_requirements("requirements/ingest/salesforce.in"), - "sftp": load_requirements("requirements/ingest/sftp.in"), - "slack": load_requirements("requirements/ingest/slack.in"), - "wikipedia": load_requirements("requirements/ingest/wikipedia.in"), - "weaviate": load_requirements("requirements/ingest/weaviate.in"), # Legacy extra requirements "huggingface": load_requirements("requirements/huggingface.in"), "local-inference": all_doc_reqs, "paddleocr": load_requirements("requirements/extra-paddleocr.in"), - "embed-huggingface": load_requirements("requirements/ingest/embed-huggingface.in"), - "embed-mixedbreadai": load_requirements("requirements/ingest/embed-mixedbreadai.in"), - "embed-octoai": load_requirements("requirements/ingest/embed-octoai.in"), - "embed-vertexai": load_requirements("requirements/ingest/embed-vertexai.in"), - "embed-voyageai": load_requirements("requirements/ingest/embed-voyageai.in"), - "openai": load_requirements("requirements/ingest/embed-openai.in"), - "bedrock": load_requirements("requirements/ingest/embed-aws-bedrock.in"), - "databricks-volumes": load_requirements("requirements/ingest/databricks-volumes.in"), - "singlestore": load_requirements("requirements/ingest/singlestore.in"), }, package_dir={"unstructured": "unstructured"}, package_data={"unstructured": ["nlp/*.txt", "py.typed"]}, diff --git a/test_unstructured/embed/test_mixedbreadai.py b/test_unstructured/embed/test_mixedbreadai.py index 015342677..0121d3d48 100644 --- a/test_unstructured/embed/test_mixedbreadai.py +++ b/test_unstructured/embed/test_mixedbreadai.py @@ -22,8 +22,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): mock_client.embeddings.side_effect = mock_embeddings - # Mock create_client to return our mock_client - mocker.patch.object(MixedbreadAIEmbeddingEncoder, "create_client", return_value=mock_client) + # Mock get_client to return our mock_client + mocker.patch.object(MixedbreadAIEmbeddingConfig, "get_client", return_value=mock_client) encoder = MixedbreadAIEmbeddingEncoder( config=MixedbreadAIEmbeddingConfig( diff --git a/test_unstructured/embed/test_octoai.py b/test_unstructured/embed/test_octoai.py index df9b302e4..6b237ff84 100644 --- a/test_unstructured/embed/test_octoai.py +++ b/test_unstructured/embed/test_octoai.py @@ -7,8 +7,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): mock_client = mocker.MagicMock() mock_client.embed_documents.return_value = [1, 2] - # Mock create_client to return our mock_client - mocker.patch.object(OctoAIEmbeddingEncoder, "create_client", return_value=mock_client) + # Mock get_client to return our mock_client + mocker.patch.object(OctoAiEmbeddingConfig, "get_client", return_value=mock_client) encoder = OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(api_key="api_key")) elements = encoder.embed_documents( diff --git a/test_unstructured/embed/test_openai.py b/test_unstructured/embed/test_openai.py index 7d37257b8..39148a454 100644 --- a/test_unstructured/embed/test_openai.py +++ b/test_unstructured/embed/test_openai.py @@ -7,8 +7,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): mock_client = mocker.MagicMock() mock_client.embed_documents.return_value = [1, 2] - # Mock create_client to return our mock_client - mocker.patch.object(OpenAIEmbeddingEncoder, "create_client", return_value=mock_client) + # Mock get_client to return our mock_client + mocker.patch.object(OpenAIEmbeddingConfig, "get_client", return_value=mock_client) encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key="api_key")) elements = encoder.embed_documents( diff --git a/test_unstructured/embed/test_vertexai.py b/test_unstructured/embed/test_vertexai.py index f754b19a1..3899a1994 100644 --- a/test_unstructured/embed/test_vertexai.py +++ b/test_unstructured/embed/test_vertexai.py @@ -8,7 +8,7 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): mock_client.embed_documents.return_value = [1, 2] # Mock create_client to return our mock_client - mocker.patch.object(VertexAIEmbeddingEncoder, "create_client", return_value=mock_client) + mocker.patch.object(VertexAIEmbeddingConfig, "get_client", return_value=mock_client) encoder = VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(api_key="api_key")) elements = encoder.embed_documents( diff --git a/test_unstructured/embed/test_voyageai.py b/test_unstructured/embed/test_voyageai.py index cd4bd0551..b759e6153 100644 --- a/test_unstructured/embed/test_voyageai.py +++ b/test_unstructured/embed/test_voyageai.py @@ -7,8 +7,8 @@ def test_embed_documents_does_not_break_element_to_dict(mocker): mock_client = mocker.MagicMock() mock_client.embed_documents.return_value = [1, 2] - # Mock create_client to return our mock_client - mocker.patch.object(VoyageAIEmbeddingEncoder, "create_client", return_value=mock_client) + # Mock get_client to return our mock_client + mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client) encoder = VoyageAIEmbeddingEncoder( config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-law-2") diff --git a/test_unstructured/ingest/utils/test_compression.py b/test_unstructured/ingest/utils/test_compression.py deleted file mode 100644 index 7699a385e..000000000 --- a/test_unstructured/ingest/utils/test_compression.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -import tarfile - -from unstructured.ingest.utils.compression import uncompress_tar_file - - -def test_uncompress_tar_file(tmpdir): - tar_filename = os.path.join(tmpdir, "test.tar") - filename = "example-docs/fake-text.txt" - - with tarfile.open(tar_filename, "w:gz") as tar: - tar.add(filename, arcname=os.path.basename(filename)) - - path = uncompress_tar_file(tar_filename, path=tmpdir.dirname) - assert path == tmpdir.dirname diff --git a/test_unstructured/test_utils.py b/test_unstructured/test_utils.py index 8d8f5a7eb..487b98b2c 100644 --- a/test_unstructured/test_utils.py +++ b/test_unstructured/test_utils.py @@ -2,7 +2,6 @@ from __future__ import annotations import json import os -import re import pytest @@ -313,32 +312,6 @@ def test_catch_overlapping_and_nested_bboxes_non_overlapping_case(): assert overlapping_cases == [] -def test_validate_data_args(): - assert utils.validate_date_args("2020-10-10") is True - - with pytest.raises(ValueError): - utils.validate_date_args("blah") - - with pytest.raises(ValueError): - utils.validate_date_args(None) - - -@pytest.mark.parametrize( - "date", ["1990-12-01", "2050-01-01T00:00:00", "2050-01-01+00:00:00", "2022-02-12T14:30:00-0500"] -) -def test_validate_date_args_accepts_standard_formats(date): - assert utils.validate_date_args(date) - - -@pytest.mark.parametrize("date", [None, "not a date", "1990-12-33"]) -def test_validate_date_args_raises_for_invalid_formats(date): - pattern1 = re.compile(r"The argument.*?(?:is None).*") - pattern2 = re.compile(r"The argument.*?(?:does not satisfy the format: YYYY-MM-DD).*") - combined_pattern = re.compile(f"({pattern1.pattern}|{pattern2.pattern})") - with pytest.raises(ValueError, match=combined_pattern): - assert utils.validate_date_args(date) - - def test_only_returns_singleton_iterable(): singleton_iterable = [42] result = utils.only(singleton_iterable) diff --git a/test_unstructured_ingest/dest/astradb.sh b/test_unstructured_ingest/dest/astradb.sh deleted file mode 100755 index 77fc0e25e..000000000 --- a/test_unstructured_ingest/dest/astradb.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=astradb-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -if [ -z "$ASTRA_DB_APPLICATION_TOKEN" ]; then - echo "Skipping Astra DB ingest test because ASTRA_DB_APPLICATION_TOKEN env var is not set." - exit 0 -fi - -if [ -z "$ASTRA_DB_API_ENDPOINT" ]; then - echo "Skipping Astra DB ingest test because ASTRA_DB_API_ENDPOINT env var is not set." - exit 0 -fi - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) -COLLECTION_NAME="astradb_test_output_$RANDOM_SUFFIX" -EMBEDDING_DIMENSION=384 - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh - -function cleanup() { - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - - python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \ - --token "$ASTRA_DB_APPLICATION_TOKEN" \ - --api-endpoint "$ASTRA_DB_API_ENDPOINT" \ - --collection-name "$COLLECTION_NAME" down -} - -trap cleanup EXIT - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --input-path example-docs/book-war-and-peace-1p.txt \ - --work-dir "$WORK_DIR" \ - --chunking-strategy by_title \ - --chunk-max-characters 1500 \ - --chunk-multipage-sections \ - --embedding-provider "langchain-huggingface" \ - astradb \ - --token "$ASTRA_DB_APPLICATION_TOKEN" \ - --api-endpoint "$ASTRA_DB_API_ENDPOINT" \ - --collection-name "$COLLECTION_NAME" \ - --embedding-dimension "$EMBEDDING_DIMENSION" \ - --requested-indexing-policy '{"deny": ["metadata"]}' - -python "$SCRIPT_DIR"/python/test-ingest-astradb-output.py \ - --token "$ASTRA_DB_APPLICATION_TOKEN" \ - --api-endpoint "$ASTRA_DB_API_ENDPOINT" \ - --collection-name "$COLLECTION_NAME" check diff --git a/test_unstructured_ingest/dest/azure-cognitive-search.sh b/test_unstructured_ingest/dest/azure-cognitive-search.sh deleted file mode 100755 index 8b534939f..000000000 --- a/test_unstructured_ingest/dest/azure-cognitive-search.sh +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_FOLDER_NAME=azure-cog-search-dest -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -UPLOAD_DIR=$WORK_DIR/upload_stage -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -AZURE_SEARCH_ENDPOINT="https://ingest-test-azure-cognitive-search.search.windows.net" - -random_id=$(uuidgen) -# index name must be all lowercase -random_id=$(echo "$random_id" | tr '[:upper:]' '[:lower:]') -DESTINATION_INDEX="utic-test-ingest-fixtures-output-$random_id" -# The vector configs on the schema currently only exist on versions: -# 2023-07-01-Preview, 2021-04-30-Preview, 2020-06-30-Preview -API_VERSION=2023-07-01-Preview - -if [ -z "$AZURE_SEARCH_API_KEY" ] || [ -z "$AZURE_SEARCH_ENDPOINT" ]; then - echo "Skipping Azure Cognitive Search ingest test because AZURE_SEARCH_API_KEY or AZURE_SEARCH_ENDPOINT env var is not set." - exit 8 -fi - -endpoint="$AZURE_SEARCH_ENDPOINT/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" -echo "Connecting to endpoint: $endpoint" -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Index cleanup - response_code=$(curl -s -o /dev/null -w "%{http_code}" \ - "$endpoint" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json') - if [ "$response_code" == "200" ]; then - echo "deleting index $DESTINATION_INDEX" - curl -X DELETE \ - "$endpoint" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json' - else - echo "Index $DESTINATION_INDEX does not exist, nothing to delete" - fi - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" -} - -trap cleanup EXIT - -# Create index -echo "Creating index $DESTINATION_INDEX" -response=$(curl -X PUT -s -w "\n%{http_code}" \ - "$endpoint" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json' \ - --data "@$SCRIPT_DIR/files/azure_cognitive_index_schema.json") -response_code=$(echo "$response" | tail -n 1) # get the last line -content=$(echo "$response" | head -n 1) # get the first line -if [ "$response_code" -lt 400 ]; then - echo "Index creation success: $response_code" -else - echo "Index creation failure [$response_code]: $content" - exit 1 -fi - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - --chunking-strategy by_title \ - --chunk-combine-text-under-n-chars 150 \ - --chunk-new-after-n-chars 1500 \ - --chunk-max-characters 2500 \ - --chunk-multipage-sections \ - --chunk-no-include-orig-elements \ - --embedding-provider "langchain-huggingface" \ - azure-cognitive-search \ - --key "$AZURE_SEARCH_API_KEY" \ - --endpoint "$AZURE_SEARCH_ENDPOINT" \ - --index "$DESTINATION_INDEX" - -# It can take some time for the index to catch up with the content that was written, this check between 10s sleeps -# to give it that time process the writes. Will timeout after checking for a minute. -docs_count_remote=0 -attempt=1 -while [ "$docs_count_remote" -eq 0 ] && [ "$attempt" -lt 6 ]; do - echo "attempt $attempt: sleeping 10 seconds to let index finish catching up after writes" - sleep 10 - - # Check the contents of the index - docs_count_remote=$(curl "$AZURE_SEARCH_ENDPOINT/indexes/$DESTINATION_INDEX/docs/\$count?api-version=$API_VERSION" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json' | jq) - - echo "docs count pulled from Azure Cognitive Search: $docs_count_remote" - - attempt=$((attempt + 1)) -done - -docs_count_local=0 -for i in $(jq length "$UPLOAD_DIR"/*.json); do - docs_count_local=$((docs_count_local + i)) -done - -if [ "$docs_count_remote" -ne "$docs_count_local" ]; then - echo "Number of docs in Azure Cognitive Search $docs_count_remote doesn't match the expected docs: $docs_count_local" - exit 1 -fi diff --git a/test_unstructured_ingest/dest/azure.sh b/test_unstructured_ingest/dest/azure.sh deleted file mode 100755 index 208b4a5a4..000000000 --- a/test_unstructured_ingest/dest/azure.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=azure-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -if [ -z "$AZURE_DEST_CONNECTION_STR" ]; then - echo "Skipping Azure destination ingest test because the AZURE_DEST_CONNECTION_STR env var is not set." - exit 8 -fi - -CONTAINER=utic-ingest-test-fixtures-output -DIRECTORY=$(uuidgen) -REMOTE_URL_RAW="$CONTAINER/$DIRECTORY/" -REMOTE_URL="abfs://$REMOTE_URL_RAW" - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup() { - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - - python "$SCRIPT_DIR"/python/test-azure-output.py down \ - --connection-string "$AZURE_DEST_CONNECTION_STR" \ - --container "$CONTAINER" \ - --blob-path "$DIRECTORY" - -} -trap cleanup EXIT - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - azure \ - --overwrite \ - --remote-url "$REMOTE_URL" \ - --connection-string "$AZURE_DEST_CONNECTION_STR" - -# Simply check the number of files uploaded -python "$SCRIPT_DIR"/python/test-azure-output.py check \ - --expected-files 1 \ - --connection-string "$AZURE_DEST_CONNECTION_STR" \ - --container "$CONTAINER" \ - --blob-path "$DIRECTORY" diff --git a/test_unstructured_ingest/dest/box.sh b/test_unstructured_ingest/dest/box.sh deleted file mode 100755 index 37ad702dd..000000000 --- a/test_unstructured_ingest/dest/box.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env bash -#TODO currently box api/sdk does not work to create folders and check for content similar to other fsspec ingest tests - -# -#set -e -# -#DEST_PATH=$(dirname "$(realpath "$0")") -#SCRIPT_DIR=$(dirname "$DEST_PATH") -#cd "$SCRIPT_DIR"/.. || exit 1 -#OUTPUT_FOLDER_NAME=box-dest -#OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -#WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -#max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -#DESTINATION_BOX="box://utic-dev-tech-fixtures/utic-ingest-test-fixtures-output/$(uuidgen)/" -# -#CI=${CI:-"false"} -# -#if [ -z "$BOX_APP_CONFIG" ] && [ -z "$BOX_APP_CONFIG_PATH" ]; then -# echo "Skipping Box ingest test because neither BOX_APP_CONFIG nor BOX_APP_CONFIG_PATH env vars are set." -# exit 0 -#fi -# -#if [ -z "$BOX_APP_CONFIG_PATH" ]; then -# # Create temporary service key file -# BOX_APP_CONFIG_PATH=$(mktemp) -# echo "$BOX_APP_CONFIG" >"$BOX_APP_CONFIG_PATH" -#fi -# -## shellcheck disable=SC1091 -#source "$SCRIPT_DIR"/cleanup.sh -#function cleanup() { -# cleanup_dir "$OUTPUT_DIR" -# cleanup_dir "$WORK_DIR" -# if [ "$CI" == "true" ]; then -# cleanup_dir "$DOWNLOAD_DIR" -# fi -#} -#trap cleanup EXIT -# -#RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -#PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ -# local \ -# --num-processes "$max_processes" \ -# --output-dir "$OUTPUT_DIR" \ -# --strategy fast \ -# --verbose \ -# --reprocess \ -# --input-path example-docs/pdf/fake-memo.pdf \ -# --work-dir "$WORK_DIR" \ -# box \ -# --box-app-config "$BOX_APP_CONFIG_PATH" \ -# --remote-url "$DESTINATION_BOX" \ -# -## Simply check the number of files uploaded -#expected_num_files=1 diff --git a/test_unstructured_ingest/dest/chroma.sh b/test_unstructured_ingest/dest/chroma.sh deleted file mode 100755 index 926cb4380..000000000 --- a/test_unstructured_ingest/dest/chroma.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=chroma-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -DESTINATION_PATH=$SCRIPT_DIR/chroma-dest -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -CI=${CI:-"false"} - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) - -COLLECTION_NAME="chroma-test-output-$RANDOM_SUFFIX" - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh - -function cleanup() { - # Kill chroma background process - pgrep -f chroma-dest | xargs kill - cleanup_dir "$DESTINATION_PATH" - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - fi -} - -trap cleanup EXIT - -# Run chroma from different script so it can be forced into background -scripts/chroma-test-helpers/create-and-check-chroma.sh "$DESTINATION_PATH" -wait -sleep 5 - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --input-path example-docs/book-war-and-peace-1p.txt \ - --work-dir "$WORK_DIR" \ - --chunking-strategy by_title \ - --chunk-max-characters 1500 \ - --chunk-multipage-sections \ - --embedding-provider "langchain-huggingface" \ - chroma \ - --host "localhost" \ - --port 8000 \ - --collection-name "$COLLECTION_NAME" \ - --tenant "default_tenant" \ - --database "default_database" \ - --batch-size 80 - -python "$SCRIPT_DIR"/python/test-ingest-chroma-output.py --collection-name "$COLLECTION_NAME" diff --git a/test_unstructured_ingest/dest/clarifai.sh b/test_unstructured_ingest/dest/clarifai.sh deleted file mode 100755 index 2ed046aae..000000000 --- a/test_unstructured_ingest/dest/clarifai.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=clarifai-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -if [ -z "$CLARIFAI_API_KEY" ]; then - echo "Skipping Clarifai ingest test because CLARIFAI_API_KEY env var is not set." - exit 0 - -fi - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) -# Set the variables with default values -USER_ID="unstructured" -APP_ID="test-app-unstructured-$RANDOM_SUFFIX" - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Get response code to check if app really exists - response_code=$(curl \ - -s -o /dev/null \ - -w "%{http_code}" \ - --request GET "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID" \ - --header "Authorization: Key $CLARIFAI_API_KEY") - - # Cleanup (delete) index if it exists - if [ "$response_code" == "200" ]; then - echo "" - echo "deleting clarifai app $APP_ID" - curl --request DELETE "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID" \ - -H "Authorization: Key $CLARIFAI_API_KEY" - - else - echo "There was an error during deletion of clarifai app $APP_ID, with response code: $response_code. App might not exists in your account." - fi - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" -} - -trap cleanup EXIT - -echo "Creating Clarifai app $APP_ID" -response_code=$( - curl \ - -s -o /dev/null \ - -w "%{http_code}" \ - --location --request POST "https://api.clarifai.com/v2/users/$USER_ID/apps/" \ - --header "Content-Type: application/json" \ - --header "Authorization: Key $CLARIFAI_API_KEY" \ - --data-raw "{\"apps\": [{\"id\": \"$APP_ID\", \"default_workflow_id\": \"Universal\"}]}" -) -if [ "$response_code" -lt 400 ]; then - echo "App created successfully: $APP_ID" -else - echo "Failed to create app $APP_ID: $response_code" - exit 1 -fi - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --input-path example-docs/book-war-and-peace-1p.txt \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --chunking-strategy by_title \ - --num-processes "$max_processes" \ - --work-dir "$WORK_DIR" \ - --verbose \ - clarifai \ - --app-id "$APP_ID" \ - --user-id "$USER_ID" \ - --api-key "$CLARIFAI_API_KEY" \ - --batch-size 100 - -no_of_inputs=0 -sleep_time=5 - -max_retries=10 -retry_count=0 - -while [ "$no_of_inputs" -eq 0 ]; do - echo "checking for no of inputs in clarifai app" - sleep $sleep_time - - if [ "$retry_count" -eq "$max_retries" ]; then - echo "Reached maximum retries limit. Exiting..." - break - fi - - resp=$(curl \ - -s GET "https://api.clarifai.com/v2/users/$USER_ID/apps/$APP_ID/inputs/status" \ - -H "Authorization: Key $CLARIFAI_API_KEY") - - no_of_inputs=$(echo "$resp" | jq -r '.counts.processed' | sed 's/\x1b\[[0-9;]*m//g') - echo "Processed count: $no_of_inputs" - retry_count=$((retry_count + 1)) - -done - -EXPECTED=8 - -if [ "$no_of_inputs" -ne "$EXPECTED" ]; then - echo "Number of inputs in the clarifai app $APP_ID is not equal to expected. Test failed." - exit 1 - -fi diff --git a/test_unstructured_ingest/dest/databricks-volumes.sh b/test_unstructured_ingest/dest/databricks-volumes.sh deleted file mode 100755 index 6cf6e38a2..000000000 --- a/test_unstructured_ingest/dest/databricks-volumes.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=databricks-volumes -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -DESTINATION_PATH=$SCRIPT_DIR/databricks-volumes -CI=${CI:-"false"} - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) - -DATABRICKS_VOLUME="test-platform" -DATABRICKS_VOLUME_PATH="databricks-volumes-test-output-$RANDOM_SUFFIX" - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh - -function cleanup() { - python "$SCRIPT_DIR"/python/test-databricks-volumes.py cleanup \ - --host "$DATABRICKS_HOST" \ - --username "$DATABRICKS_USERNAME" \ - --password "$DATABRICKS_PASSWORD" \ - --volume "$DATABRICKS_VOLUME" \ - --catalog "$DATABRICKS_CATALOG" \ - --volume-path "$DATABRICKS_VOLUME_PATH" - - cleanup_dir "$DESTINATION_PATH" - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - fi -} - -trap cleanup EXIT - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - databricks-volumes \ - --host "$DATABRICKS_HOST" \ - --username "$DATABRICKS_USERNAME" \ - --password "$DATABRICKS_PASSWORD" \ - --volume "$DATABRICKS_VOLUME" \ - --catalog "$DATABRICKS_CATALOG" \ - --volume-path "$DATABRICKS_VOLUME_PATH" - -python "$SCRIPT_DIR"/python/test-databricks-volumes.py test \ - --host "$DATABRICKS_HOST" \ - --username "$DATABRICKS_USERNAME" \ - --password "$DATABRICKS_PASSWORD" \ - --volume "$DATABRICKS_VOLUME" \ - --catalog "$DATABRICKS_CATALOG" \ - --volume-path "$DATABRICKS_VOLUME_PATH" diff --git a/test_unstructured_ingest/dest/delta-table.sh b/test_unstructured_ingest/dest/delta-table.sh deleted file mode 100755 index cf54e1054..000000000 --- a/test_unstructured_ingest/dest/delta-table.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=delta-table-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -DESTINATION_TABLE=$SCRIPT_DIR/delta-table-dest -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -CI=${CI:-"false"} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh - -function cleanup() { - cleanup_dir "$DESTINATION_TABLE" - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" -} - -trap cleanup EXIT - -# Make sure directory doesn't exist at the beginning of script as this will cause it to break -if [ -d "$DESTINATION_TABLE" ]; then - echo "cleaning up directory: $DESTINATION_TABLE" - rm -rf "$DESTINATION_TABLE" -else - echo "$DESTINATION_TABLE does not exist or is not a directory, skipping deletion" -fi - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - delta-table \ - --table-uri "$DESTINATION_TABLE" - -python "$SCRIPT_DIR"/python/test-ingest-delta-table-output.py --table-uri "$DESTINATION_TABLE" diff --git a/test_unstructured_ingest/dest/dropbox.sh b/test_unstructured_ingest/dest/dropbox.sh deleted file mode 100755 index 52ade6722..000000000 --- a/test_unstructured_ingest/dest/dropbox.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=dropbox-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -DESTINATION_DROPBOX="/test-output/$(uuidgen)" -CI=${CI:-"false"} - -if [ -z "$DROPBOX_APP_KEY" ] || [ -z "$DROPBOX_APP_SECRET" ] || [ -z "$DROPBOX_REFRESH_TOKEN" ]; then - echo "Skipping Dropbox ingest test because one or more of these env vars is not set:" - echo "DROPBOX_APP_KEY, DROPBOX_APP_SECRET, DROPBOX_REFRESH_TOKEN" - exit 8 -fi - -# Get a new access token from Dropbox -DROPBOX_RESPONSE=$(curl -s https://api.dropbox.com/oauth2/token -d refresh_token="$DROPBOX_REFRESH_TOKEN" -d grant_type=refresh_token -d client_id="$DROPBOX_APP_KEY" -d client_secret="$DROPBOX_APP_SECRET") -DROPBOX_ACCESS_TOKEN=$(jq -r '.access_token' <<<"$DROPBOX_RESPONSE") - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup() { - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - - echo "deleting test folder $DESTINATION_DROPBOX" - curl -X POST https://api.dropboxapi.com/2/files/delete_v2 \ - --header "Content-Type: application/json" \ - --header "Authorization: Bearer $DROPBOX_ACCESS_TOKEN" \ - --data "{\"path\":\"$DESTINATION_DROPBOX\"}" | jq -} -trap cleanup EXIT - -# Create new folder for test -echo "creating temp directory in dropbox for testing: $DESTINATION_DROPBOX" -response=$(curl -X POST -s -w "\n%{http_code}" https://api.dropboxapi.com/2/files/create_folder_v2 \ - --header "Content-Type: application/json" \ - --header "Authorization: Bearer $DROPBOX_ACCESS_TOKEN" \ - --data "{\"autorename\":false,\"path\":\"$DESTINATION_DROPBOX\"}") -http_code=$(tail -n1 <<<"$response") # get the last line -content=$(sed '$ d' <<<"$response") # get all but the last line which contains the status code - -if [ "$http_code" -ge 300 ]; then - echo "Failed to create temp dir in dropbox: [$http_code] $content" - exit 1 -else - echo "$http_code:" - jq <<<"$content" -fi - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - dropbox \ - --token "$DROPBOX_ACCESS_TOKEN" \ - --remote-url "dropbox://$DESTINATION_DROPBOX" - -# Simply check the number of files uploaded -expected_num_files=1 -num_files_in_dropbox=$(curl -X POST https://api.dropboxapi.com/2/files/list_folder \ - --header "Content-Type: application/json" \ - --header "Authorization: Bearer $DROPBOX_ACCESS_TOKEN" \ - --data "{\"path\":\"$DESTINATION_DROPBOX/\"}" | jq '.entries | length') -if [ "$num_files_in_dropbox" -ne "$expected_num_files" ]; then - echo "Expected $expected_num_files files to be uploaded to dropbox, but found $num_files_in_dropbox files." - exit 1 -fi diff --git a/test_unstructured_ingest/dest/elasticsearch.sh b/test_unstructured_ingest/dest/elasticsearch.sh deleted file mode 100755 index c4e6c8fe2..000000000 --- a/test_unstructured_ingest/dest/elasticsearch.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=elasticsearch-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -CI=${CI:-"false"} -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -# shellcheck disable=SC1091 -source scripts/elasticsearch-test-helpers/common/es-dest-ingest-test-creds.env -function cleanup { - # Index cleanup - echo "Stopping Elasticsearch Docker container" - docker-compose -f scripts/elasticsearch-test-helpers/common/docker-compose.yaml down --remove-orphans -v - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - fi -} - -trap cleanup EXIT - -echo "Creating elasticsearch instance" -# shellcheck source=/dev/null -scripts/elasticsearch-test-helpers/destination_connector/create-elasticsearch-instance.sh -wait - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/book-war-and-peace-1225p.txt \ - --work-dir "$WORK_DIR" \ - --chunking-strategy by_title \ - --chunk-combine-text-under-n-chars 200 \ - --chunk-new-after-n-chars 2500 \ - --chunk-max-characters 38000 \ - --chunk-multipage-sections \ - --embedding-provider "langchain-huggingface" \ - elasticsearch \ - --hosts http://localhost:9200 \ - --index-name ingest-test-destination \ - --username "$ELASTIC_USER" \ - --password "$ELASTIC_PASSWORD" \ - --batch-size-bytes 15000000 \ - --num-threads "$max_processes" - -desired_count=$(cat "$WORK_DIR"/upload_stage/* | jq 'length') -desired_embeddings=$(cat "$WORK_DIR"/upload_stage/* | jq '.[0]._source.embeddings' | tr -d '\n') - -PYTHONPATH=. scripts/elasticsearch-test-helpers/destination_connector/test-ingest-elasticsearch-output.py \ - --num-elements "$desired_count" \ - --embeddings "$desired_embeddings" diff --git a/test_unstructured_ingest/dest/gcs.sh b/test_unstructured_ingest/dest/gcs.sh deleted file mode 100755 index 21571a937..000000000 --- a/test_unstructured_ingest/dest/gcs.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=gcs-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -BUCKET="utic-test-ingest-fixtures-output" -DIRECTORY=$(uuidgen) -DESTINATION_GCS="gs://$BUCKET/$DIRECTORY" -CI=${CI:-"false"} - -if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then - echo "Skipping Google Drive ingest test because the GCP_INGEST_SERVICE_KEY env var is not set." - exit 8 -fi - -# Create temporary service key file -GCP_INGEST_SERVICE_KEY_FILE=$(mktemp) -echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE" - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup() { - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - - python "$SCRIPT_DIR"/python/test-gcs-output.py down \ - --service-account-file "$GCP_INGEST_SERVICE_KEY_FILE" \ - --bucket "$BUCKET" \ - --blob-path "$DIRECTORY" - -} - -trap cleanup EXIT - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - gcs \ - --service-account-key "$GCP_INGEST_SERVICE_KEY_FILE" \ - --remote-url "$DESTINATION_GCS" - -# Simply check the number of files uploaded -python "$SCRIPT_DIR"/python/test-gcs-output.py check \ - --expected-files 1 \ - --service-account-file "$GCP_INGEST_SERVICE_KEY_FILE" \ - --bucket "$BUCKET" \ - --blob-path "$DIRECTORY" diff --git a/test_unstructured_ingest/dest/kafka-local.sh b/test_unstructured_ingest/dest/kafka-local.sh deleted file mode 100755 index 9086687ed..000000000 --- a/test_unstructured_ingest/dest/kafka-local.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=local-kafka-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) - -LC_ALL=C - -# Set the variables with default values if they're not set in the environment -KAFKA_TOPIC=${KAFKA_TOPIC:-"ingest-test-$RANDOM_SUFFIX"} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - - echo "Stopping local Kafka instance" - docker-compose -f scripts/kafka-test-helpers/docker-compose.yml down --remove-orphans -v -} - -trap cleanup EXIT - -echo "Creating local Kafka instance" -# shellcheck source=/dev/null -scripts/kafka-test-helpers/create-kafka-instance.sh -wait - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/layout-parser-paper.pdf \ - --work-dir "$WORK_DIR" \ - --chunking-strategy basic \ - --chunk-combine-text-under-n-chars 200 \ - --chunk-new-after-n-chars 2500 \ - --chunk-max-characters 38000 \ - --chunk-multipage-sections \ - --embedding-provider "langchain-huggingface" \ - kafka \ - --topic "$KAFKA_TOPIC" \ - --bootstrap-server "$KAFKA_BOOTSTRAP_SERVER" \ - --port 29092 \ - --confluent false - -echo "Checking for matching messages in Kafka" - -#Check the number of messages in destination topic -python "$SCRIPT_DIR"/python/test-kafka-output.py check \ - --bootstrap-server "$KAFKA_BOOTSTRAP_SERVER" \ - --topic "$KAFKA_TOPIC" \ - --confluent false \ - --port 29092 diff --git a/test_unstructured_ingest/dest/mongodb.sh b/test_unstructured_ingest/dest/mongodb.sh deleted file mode 100755 index 938af0d5f..000000000 --- a/test_unstructured_ingest/dest/mongodb.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash -# shellcheck disable=SC2012 - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=mongodb-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -DESTINATION_MONGO_COLLECTION="utic-test-ingest-fixtures-output-$(uuidgen)" -CI=${CI:-"false"} - -if [ -z "$MONGODB_URI" ] && [ -z "$MONGODB_DATABASE_NAME" ]; then - echo "Skipping MongoDB destination ingest test because the MONGODB_URI and MONGODB_DATABASE_NAME env var are not set." - exit 8 -fi - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup() { - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - - python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \ - --uri "$MONGODB_URI" \ - --database "$MONGODB_DATABASE_NAME" \ - --collection "$DESTINATION_MONGO_COLLECTION" down - -} - -trap cleanup EXIT - -# NOTE(robinson) - per pymongo docs, pymongo ships with its own version of the bson library, -# which is incompatible with the bson installed from pypi. bson is installed as part of the -# astradb dependencies. -# ref: https://pymongo.readthedocs.io/en/stable/installation.html -python -m pip uninstall -y bson pymongo -make install-ingest-mongodb - -python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \ - --uri "$MONGODB_URI" \ - --database "$MONGODB_DATABASE_NAME" \ - --collection "$DESTINATION_MONGO_COLLECTION" up - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-huggingface" \ - mongodb \ - --uri "$MONGODB_URI" \ - --database "$MONGODB_DATABASE_NAME" \ - --collection "$DESTINATION_MONGO_COLLECTION" - -python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \ - --uri "$MONGODB_URI" \ - --database "$MONGODB_DATABASE_NAME" \ - --collection "$DESTINATION_MONGO_COLLECTION" \ - check --expected-records 5 - -stage_file=$(ls -1 "$WORK_DIR"/upload_stage | head -n 1) -python "$SCRIPT_DIR"/python/test-ingest-mongodb.py \ - --uri "$MONGODB_URI" \ - --database "$MONGODB_DATABASE_NAME" \ - --collection "$DESTINATION_MONGO_COLLECTION" \ - check-vector \ - --output-json "$WORK_DIR"/upload_stage/"$stage_file" diff --git a/test_unstructured_ingest/dest/opensearch.sh b/test_unstructured_ingest/dest/opensearch.sh deleted file mode 100755 index 003e4f286..000000000 --- a/test_unstructured_ingest/dest/opensearch.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=opensearch-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -CI=${CI:-"false"} -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Index cleanup - echo "Stopping OpenSearch Docker container" - docker-compose -f scripts/opensearch-test-helpers/common/docker-compose.yaml down --remove-orphans -v - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - fi -} - -trap cleanup EXIT - -echo "Creating opensearch instance" -# shellcheck source=/dev/null -scripts/opensearch-test-helpers/destination_connector/create-opensearch-instance.sh -wait - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-huggingface" \ - opensearch \ - --hosts http://localhost:9247 \ - --index-name ingest-test-destination \ - --username "admin" \ - --password "admin" \ - --use-ssl \ - --batch-size-bytes 150 \ - --num-threads "$max_processes" - -scripts/opensearch-test-helpers/destination_connector/test-ingest-opensearch-output.py diff --git a/test_unstructured_ingest/dest/pgvector.sh b/test_unstructured_ingest/dest/pgvector.sh deleted file mode 100755 index 25836cf1d..000000000 --- a/test_unstructured_ingest/dest/pgvector.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=sql-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -CI=${CI:-"false"} -DATABASE_TYPE="pgvector" - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - echo "Stopping SQL DB Docker container" - docker-compose -f scripts/sql-test-helpers/docker-compose-"$DATABASE_TYPE".yaml down --remove-orphans -v - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - fi -} - -trap cleanup EXIT - -# Create sql instance and create `elements` class -echo "Creating SQL DB instance" -# shellcheck source=/dev/null -scripts/sql-test-helpers/create-sql-instance.sh "$DATABASE_TYPE" -wait - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-huggingface" \ - sql \ - --db-type "postgresql" \ - --username unstructured \ - --password test \ - --host localhost \ - --port 5433 \ - --database elements - -"$SCRIPT_DIR"/python/test-ingest-sql-output.py "$DATABASE_TYPE" "5433" diff --git a/test_unstructured_ingest/dest/pinecone.sh b/test_unstructured_ingest/dest/pinecone.sh deleted file mode 100755 index 45adaca83..000000000 --- a/test_unstructured_ingest/dest/pinecone.sh +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=s3-pinecone-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -writer_processes=$(((max_processes - 1) > 1 ? (max_processes - 1) : 2)) - -if [ -z "$PINECONE_API_KEY" ]; then - echo "Skipping Pinecone ingest test because PINECONE_API_KEY env var is not set." - exit 0 -fi - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) - -# Set the variables with default values if they're not set in the environment -PINECONE_INDEX=${PINECONE_INDEX:-"ingest-test-$RANDOM_SUFFIX"} -PINECONE_HOST_POSTFIX=${PINECONE_HOST_POSTFIX:-"4627-b74a"} -PINECONE_ENVIRONMENT=${PINECONE_ENVIRONMENT:-"us-east1-gcp"} -PINECONE_PROJECT_ID=${PINECONE_PROJECT_ID:-"art8iaj"} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - - # Get response code to check if index exists - response_code=$(curl \ - -s -o /dev/null \ - -w "%{http_code}" \ - --request GET \ - --url "https://api.pinecone.io/indexes/$PINECONE_INDEX" \ - --header 'accept: application/json' \ - --header "Api-Key: $PINECONE_API_KEY") - - # Cleanup (delete) index if it exists - if [ "$response_code" == "200" ]; then - echo "" - echo "deleting index $PINECONE_INDEX" - curl --request DELETE \ - "https://api.pinecone.io/indexes/$PINECONE_INDEX" \ - --header "Api-Key: $PINECONE_API_KEY" \ - --header 'content-type: application/json' - - else - echo "There was an error during index deletion for index $PINECONE_INDEX, with response code: $response_code. It might be that index $PINECONE_INDEX does not exist, so there is nothing to delete." - fi - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" -} - -trap cleanup EXIT - -echo "Creating index $PINECONE_INDEX" -response_code=$(curl \ - -s -o /dev/null \ - -w "%{http_code}" \ - --request POST \ - --url "https://api.pinecone.io/indexes" \ - --header "accept: application/json" \ - --header "content-type: application/json" \ - --header "Api-Key: $PINECONE_API_KEY" \ - --data ' -{ - "name": "'"$PINECONE_INDEX"'", - "dimension": 384, - "metric": "cosine", - "spec": { - "serverless": { - "cloud": "aws", - "region": "us-east-1" - } - } -} -') - -if [ "$response_code" -lt 400 ]; then - echo "Index creation success: $response_code" -else - echo "Index creation failure: $response_code" - exit 1 -fi - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/book-war-and-peace-1225p.txt \ - --work-dir "$WORK_DIR" \ - --chunking-strategy by_title \ - --chunk-combine-text-under-n-chars 150 --chunk-new-after-n-chars 1500 --chunk-max-characters 2500 --chunk-multipage-sections \ - --embedding-provider "langchain-huggingface" \ - pinecone \ - --api-key "$PINECONE_API_KEY" \ - --index-name "$PINECONE_INDEX" \ - --environment "$PINECONE_ENVIRONMENT" \ - --batch-size 80 \ - --num-processes "$writer_processes" - -# It can take some time for the index to catch up with the content that was written, this check between 10s sleeps -# to give it that time process the writes. Will timeout after checking for a minute. -num_of_vectors_remote=0 -attempt=1 -sleep_amount=30 -while [ "$num_of_vectors_remote" -eq 0 ] && [ "$attempt" -lt 4 ]; do - echo "attempt $attempt: sleeping $sleep_amount seconds to let index finish catching up after writes" - sleep $sleep_amount - - num_of_vectors_remote=$(curl --request POST \ - -s \ - --url "https://$PINECONE_INDEX-$PINECONE_PROJECT_ID.svc.aped-$PINECONE_HOST_POSTFIX.pinecone.io/describe_index_stats" \ - --header "accept: application/json" \ - --header "content-type: application/json" \ - --header "Api-Key: $PINECONE_API_KEY" | jq -r '.totalVectorCount') - - echo "vector count in Pinecone: $num_of_vectors_remote" - attempt=$((attempt + 1)) -done - -EXPECTED=1835 - -if [ "$num_of_vectors_remote" -ne $EXPECTED ]; then - echo "Number of vectors in Pinecone are $num_of_vectors_remote when the expected number is $EXPECTED. Test failed." - exit 1 -fi diff --git a/test_unstructured_ingest/dest/qdrant.sh b/test_unstructured_ingest/dest/qdrant.sh deleted file mode 100755 index ec9cf7cee..000000000 --- a/test_unstructured_ingest/dest/qdrant.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash - -set -ex - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=qdrant-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -writer_processes=$(((max_processes - 1) > 1 ? (max_processes - 1) : 2)) -CONTAINTER_NAME="qdrant_test" -QDRANT_PORT=6333 -QDRANT_HOST=localhost:$QDRANT_PORT -COLLECTION_NAME="qdrant-test-$(date +%s)" -EXPECTED_POINTS_COUNT=1387 -RETRIES=5 - -function stop_docker() { - docker stop $CONTAINTER_NAME -} - -docker run -d --rm \ - -p 6333:$QDRANT_PORT \ - --name $CONTAINTER_NAME qdrant/qdrant:latest - -trap stop_docker SIGINT -trap stop_docker ERR - -until curl --output /dev/null --silent --get --fail http://$QDRANT_HOST/collections; do - RETRIES=$((RETRIES - 1)) - if [ "$RETRIES" -le 0 ]; then - echo "Qdrant server failed to start" - stop_docker - exit 1 - fi - printf 'Waiting for Qdrant server to start...' - sleep 5 -done - -curl -X PUT \ - http://$QDRANT_HOST/collections/"$COLLECTION_NAME" \ - -H 'Content-Type: application/json' \ - -d '{ - "vectors": { - "size": 384, - "distance": "Cosine" - } -}' - -EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER:-"langchain-huggingface"} - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/book-war-and-peace-1225p.txt \ - --work-dir "$WORK_DIR" \ - --chunking-strategy by_title \ - --chunk-combine-text-under-n-chars 200 --chunk-new-after-n-chars 2500 --chunk-max-characters 38000 --chunk-multipage-sections \ - --embedding-provider "langchain-huggingface" \ - qdrant \ - --collection-name "$COLLECTION_NAME" \ - --location "http://"$QDRANT_HOST \ - --batch-size 80 \ - --num-processes "$writer_processes" - -response=$(curl -s -X POST \ - $QDRANT_HOST/collections/"$COLLECTION_NAME"/points/count \ - -H 'Content-Type: application/json' \ - -d '{ - "exact": true -}') - -count=$(echo "$response" | jq -r '.result.count') - -if [ "$count" -ne $EXPECTED_POINTS_COUNT ]; then - echo "Points count assertion failed. Expected: $EXPECTED. Got: $count. Test failed." - stop_docker - exit 1 -fi - -stop_docker diff --git a/test_unstructured_ingest/dest/s3.sh b/test_unstructured_ingest/dest/s3.sh deleted file mode 100755 index b8d0b901e..000000000 --- a/test_unstructured_ingest/dest/s3.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=s3-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -DESTINATION_S3="s3://utic-ingest-test-fixtures/destination/$(uuidgen)/" -CI=${CI:-"false"} - -if [ -z "$S3_INGEST_TEST_ACCESS_KEY" ] || [ -z "$S3_INGEST_TEST_SECRET_KEY" ]; then - echo "Skipping S3 ingest test because S3_INGEST_TEST_ACCESS_KEY or S3_INGEST_TEST_SECRET_KEY env var is not set." - exit 8 -fi - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup() { - cleanup_dir "$WORK_DIR" - - if AWS_ACCESS_KEY_ID="$S3_INGEST_TEST_ACCESS_KEY" AWS_SECRET_ACCESS_KEY="$S3_INGEST_TEST_SECRET_KEY" aws s3 ls "$DESTINATION_S3" --region us-east-2; then - echo "deleting destination s3 location: $DESTINATION_S3" - AWS_ACCESS_KEY_ID="$S3_INGEST_TEST_ACCESS_KEY" AWS_SECRET_ACCESS_KEY="$S3_INGEST_TEST_SECRET_KEY" aws s3 rm "$DESTINATION_S3" --recursive --region us-east-2 - fi - -} -trap cleanup EXIT - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - local \ - --num-processes "$max_processes" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - s3 \ - --key "$S3_INGEST_TEST_ACCESS_KEY" \ - --secret "$S3_INGEST_TEST_SECRET_KEY" \ - --remote-url "$DESTINATION_S3" - -# Simply check the number of files uploaded -expected_num_files=1 -num_files_in_s3=$(AWS_ACCESS_KEY_ID="$S3_INGEST_TEST_ACCESS_KEY" AWS_SECRET_ACCESS_KEY="$S3_INGEST_TEST_SECRET_KEY" aws s3 ls "${DESTINATION_S3}" --region us-east-2 | grep -c "\.json$") -if [ "$num_files_in_s3" -ne "$expected_num_files" ]; then - echo "Expected $expected_num_files files to be uploaded to s3, but found $num_files_in_s3 files." - exit 1 -else - echo "Expected number of files found: $num_files_in_s3/$expected_num_files" -fi diff --git a/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh b/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh deleted file mode 100755 index 5c222a459..000000000 --- a/test_unstructured_ingest/dest/sharepoint-embed-cog-index.sh +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=sharepoint-azure-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -DOWNLOAD_DIR=$SCRIPT_DIR/download/$OUTPUT_FOLDER_NAME -DESTINATION_INDEX="utic-test-ingest-fixtures-output-$(uuidgen)" -# The vector configs on the schema currently only exist on versions: -# 2023-07-01-Preview, 2021-04-30-Preview, 2020-06-30-Preview -API_VERSION=2023-07-01-Preview -CI=${CI:-"false"} - -if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then - echo "Skipping Sharepoint ingest test because the SHAREPOINT_CLIENT_ID or SHAREPOINT_CRED env var is not set." - exit 8 -fi - -if [ -z "$SHAREPOINT_PERMISSIONS_APP_ID" ] || [ -z "$SHAREPOINT_PERMISSIONS_APP_CRED" ] || [ -z "$SHAREPOINT_PERMISSIONS_TENANT" ]; then - echo "Skipping Sharepoint ingest test because the SHAREPOINT_PERMISSIONS_APP_ID, SHAREPOINT_PERMISSIONS_APP_CRED, or SHAREPOINT_PERMISSIONS_TENANT env var is not set." - exit 8 -fi - -if [ -z "$OPENAI_API_KEY" ]; then - echo "Skipping Sharepoint embedding ingest test because the OPENAI_API_KEY env var is not set." - exit 8 -fi - -if [ -z "$AZURE_SEARCH_ENDPOINT" ] && [ -z "$AZURE_SEARCH_API_KEY" ]; then - echo "Skipping Sharepoint Azure Cognitive Search ingest test because neither AZURE_SEARCH_ENDPOINT nor AZURE_SEARCH_API_KEY env vars are set." - exit 8 -fi - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh - -function cleanup { - response_code=$(curl -s -o /dev/null -w "%{http_code}" \ - "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json') - if [ "$response_code" == "200" ]; then - echo "deleting index $DESTINATION_INDEX" - curl -X DELETE \ - "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json' - else - echo "Index $DESTINATION_INDEX does not exist, nothing to delete" - fi - - cleanup_dir "$OUTPUT_DIR" - cleanup_dir "$WORK_DIR" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - fi -} - -trap cleanup EXIT - -# Create index -echo "Creating index $DESTINATION_INDEX" -response_code=$(curl -s -o /dev/null -w "%{http_code}" -X PUT \ - "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX?api-version=$API_VERSION" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json' \ - --data "@$SCRIPT_DIR/files/azure_cognitive_index_schema.json") - -if [ "$response_code" -lt 400 ]; then - echo "Index creation success: $response_code" -else - echo "Index creation failure: $response_code" - exit 1 -fi - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ - sharepoint \ - --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ - --num-processes 2 \ - --strategy hi_res \ - --preserve-downloads \ - --reprocess \ - --output-dir "$OUTPUT_DIR" \ - --verbose \ - --client-cred "$SHAREPOINT_CRED" \ - --client-id "$SHAREPOINT_CLIENT_ID" \ - --site "$SHAREPOINT_SITE" \ - --permissions-application-id "$SHAREPOINT_PERMISSIONS_APP_ID" \ - --permissions-client-cred "$SHAREPOINT_PERMISSIONS_APP_CRED" \ - --permissions-tenant "$SHAREPOINT_PERMISSIONS_TENANT" \ - --path "Shared Documents" \ - --recursive \ - --embedding-provider "langchain-huggingface" \ - --chunking-strategy by_title \ - --chunk-multipage-sections \ - --work-dir "$WORK_DIR" \ - azure-cognitive-search \ - --key "$AZURE_SEARCH_API_KEY" \ - --endpoint "$AZURE_SEARCH_ENDPOINT" \ - --index "$DESTINATION_INDEX" - -# It can take some time for the index to catch up with the content that was written, this check between 10s sleeps -# to give it that time process the writes. Will timeout after checking for a minute. -docs_count_remote=0 -attempt=1 -while [ "$docs_count_remote" -eq 0 ] && [ "$attempt" -lt 6 ]; do - echo "attempt $attempt: sleeping 10 seconds to let index finish catching up after writes" - sleep 10 - - # Check the contents of the index - docs_count_remote=$(curl "https://utic-test-ingest-fixtures.search.windows.net/indexes/$DESTINATION_INDEX/docs/\$count?api-version=$API_VERSION" \ - --header "api-key: $AZURE_SEARCH_API_KEY" \ - --header 'content-type: application/json' | jq) - - echo "docs count pulled from Azure: $docs_count_remote" - - attempt=$((attempt + 1)) -done - -docs_count_local=0 -for i in $(jq length "$OUTPUT_DIR"/**/*.json); do - docs_count_local=$((docs_count_local + i)) -done - -if [ "$docs_count_remote" -ne "$docs_count_local" ]; then - echo "Number of docs $docs_count_remote doesn't match the expected docs: $docs_count_local" - exit 1 -fi diff --git a/test_unstructured_ingest/dest/singlestore.sh b/test_unstructured_ingest/dest/singlestore.sh deleted file mode 100755 index a04f81370..000000000 --- a/test_unstructured_ingest/dest/singlestore.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=singlestore-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -CI=${CI:-"false"} -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Index cleanup - echo "Stopping Singlestore Docker container" - docker compose -f scripts/singlestore-test-helpers/docker-compose.yml down --remove-orphans -v - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - -} - -trap cleanup EXIT - -# Create singlestore instance and create `elements` class -echo "Creating singlestore instance" -# shellcheck source=/dev/null -docker compose -f scripts/singlestore-test-helpers/docker-compose.yml up -d --wait-timeout 60 - -DATABASE=ingest_test -USER=root -HOST=localhost -PASSWORD=password -PORT=3306 -TABLE=elements - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-huggingface" \ - singlestore \ - --host $HOST \ - --user $USER \ - --password $PASSWORD \ - --database $DATABASE \ - --port $PORT \ - --table-name $TABLE \ - --drop-empty-cols - -expected_num_elements=$(cat "$WORK_DIR"/embed/* | jq 'length') -./scripts/singlestore-test-helpers/test_outputs.py \ - --table-name $TABLE \ - --database $DATABASE \ - --num-elements "$expected_num_elements" diff --git a/test_unstructured_ingest/dest/sqlite.sh b/test_unstructured_ingest/dest/sqlite.sh deleted file mode 100755 index 9cd54b35e..000000000 --- a/test_unstructured_ingest/dest/sqlite.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash - -set -e - -SRC_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$SRC_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=sql-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} -CI=${CI:-"false"} -DATABASE_TYPE="sqlite" -DB_PATH=$SCRIPT_DIR/elements.db - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - rm -rf "$DB_PATH" - if [ "$CI" == "true" ]; then - cleanup_dir "$DOWNLOAD_DIR" - - fi -} - -trap cleanup EXIT - -# Create sql instance and create `elements` class -echo "Creating SQL DB instance" -# shellcheck source=/dev/null -scripts/sql-test-helpers/create-sql-instance.sh "$DATABASE_TYPE" "$DB_PATH" -wait - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - sql \ - --db-type "$DATABASE_TYPE" \ - --username unstructured \ - --database "$DB_PATH" - -"$SCRIPT_DIR"/python/test-ingest-sql-output.py "$DATABASE_TYPE" "$DB_PATH" diff --git a/test_unstructured_ingest/dest/vectara.sh b/test_unstructured_ingest/dest/vectara.sh deleted file mode 100755 index 0ba223d44..000000000 --- a/test_unstructured_ingest/dest/vectara.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=local-vectara-dest -OUTPUT_DIR=$SCRIPT_DIR/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$SCRIPT_DIR/workdir/$OUTPUT_FOLDER_NAME -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -RANDOM_SUFFIX=$((RANDOM % 100000 + 1)) -CORPUS_NAME="test-corpus-vectara-"$RANDOM_SUFFIX - -# Expected size of the uploaded document -EXPECTED_CORPUS_SIZE=8843308 - -if [ -z "$VECTARA_OAUTH_CLIENT_ID" ] && [ -z "$VECTARA_OAUTH_SECRET" ] && [ -z "$VECTARA_CUSTOMER_ID" ]; then - echo "Skipping VECTARA ingest test because VECTARA_OAUTH_CLIENT_ID, VECTARA_OAUTH_SECRET, or VECTARA_CUSTOMER_ID env var is not set." - exit 8 -fi - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - echo "Deleting corpus $corpus_id ($CORPUS_NAME)" - curl -sS -L -X POST 'https://api.vectara.io/v1/delete-corpus' \ - -H 'Content-Type: application/json' \ - -H 'Accept: application/json' \ - -H "Authorization: Bearer $access_token" \ - -H "customer-id: $VECTARA_CUSTOMER_ID" \ - --data-raw "{ - \"corpusId\": $corpus_id - }" - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" -} - -trap cleanup EXIT - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/book-war-and-peace-1225p.txt \ - --work-dir "$WORK_DIR" \ - vectara \ - --customer-id "$VECTARA_CUSTOMER_ID" \ - --oauth-client-id "$VECTARA_OAUTH_CLIENT_ID" \ - --oauth-secret "$VECTARA_OAUTH_SECRET" \ - --corpus-name "$CORPUS_NAME" - -# Get JWT token -jwt_token_resp=$(curl -sS -XPOST -H "Content-type: application/x-www-form-urlencoded" -d \ - "grant_type=client_credentials&client_id=$VECTARA_OAUTH_CLIENT_ID&client_secret=$VECTARA_OAUTH_SECRET" \ - "https://vectara-prod-$VECTARA_CUSTOMER_ID.auth.us-west-2.amazoncognito.com/oauth2/token") -access_token=$(echo "$jwt_token_resp" | jq -r '.access_token') - -# Get corpus ID from name -corpora_resp=$(curl -sS -L -X POST 'https://api.vectara.io/v1/list-corpora' \ - -H 'Content-Type: application/json' \ - -H 'Accept: application/json' \ - -H "customer-id: $VECTARA_CUSTOMER_ID" \ - -H "Authorization: Bearer $access_token" \ - --data-raw "{ - \"numResults\": 100, - \"filter\": \"$CORPUS_NAME\" - }") -corpus_id=$(echo "$corpora_resp" | jq -r '.corpus[0].id') - -# Check that the size of the corpus is as expected -get_corpus_size=$(curl -L -X POST 'https://api.vectara.io/v1/compute-corpus-size' \ - -H 'Content-Type: application/json' \ - -H 'Accept: application/json' \ - -H "customer-id: $VECTARA_CUSTOMER_ID" \ - -H "Authorization: Bearer $access_token" \ - --data-raw "{ - \"corpusId\": $corpus_id -}") -corpus_size=$(echo "$get_corpus_size" | jq -r '.size.size') - -if [ "$corpus_size" == "$EXPECTED_CORPUS_SIZE" ]; then - echo "Corpus size is as expected: $corpus_size" -else - echo "Corpus size is not as expected: $corpus_size" - echo "vs $EXPECTED_CORPUS_SIZE" - exit 1 -fi diff --git a/test_unstructured_ingest/dest/weaviate.sh b/test_unstructured_ingest/dest/weaviate.sh deleted file mode 100755 index 7dfa3281a..000000000 --- a/test_unstructured_ingest/dest/weaviate.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash - -set -e - -DEST_PATH=$(dirname "$(realpath "$0")") -SCRIPT_DIR=$(dirname "$DEST_PATH") -cd "$SCRIPT_DIR"/.. || exit 1 -OUTPUT_FOLDER_NAME=weaviate-dest -OUTPUT_ROOT=${OUTPUT_ROOT:-$SCRIPT_DIR} -OUTPUT_DIR=$OUTPUT_ROOT/structured-output/$OUTPUT_FOLDER_NAME -WORK_DIR=$OUTPUT_ROOT/workdir/$OUTPUT_FOLDER_NAME -CI=${CI:-"false"} -max_processes=${MAX_PROCESSES:=$(python3 -c "import os; print(os.cpu_count())")} - -# shellcheck disable=SC1091 -source "$SCRIPT_DIR"/cleanup.sh -function cleanup { - # Index cleanup - echo "Stopping Weaviate Docker container" - docker-compose -f scripts/weaviate-test-helpers/docker-compose.yml down --remove-orphans -v - - # Local file cleanup - cleanup_dir "$WORK_DIR" - cleanup_dir "$OUTPUT_DIR" - -} - -trap cleanup EXIT - -# Create weaviate instance and create `elements` class -echo "Creating weaviate instance" -# shellcheck source=/dev/null -scripts/weaviate-test-helpers/create-weaviate-instance.sh -wait - -PYTHONPATH=. ./unstructured/ingest/main.py \ - local \ - --num-processes "$max_processes" \ - --output-dir "$OUTPUT_DIR" \ - --strategy fast \ - --verbose \ - --reprocess \ - --input-path example-docs/pdf/fake-memo.pdf \ - --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-huggingface" \ - weaviate \ - --host-url http://localhost:8080 \ - --class-name elements \ - --anonymous - -"$SCRIPT_DIR"/python/test-ingest-weaviate-output.py diff --git a/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json index 92d6daaa1..06e6a9009 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/azure/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf.json @@ -11,7 +11,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -33,7 +33,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -55,7 +55,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -77,7 +77,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -99,7 +99,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -121,7 +121,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -143,7 +143,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -165,7 +165,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -187,7 +187,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -209,7 +209,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -231,7 +231,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -253,7 +253,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -275,7 +275,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -297,7 +297,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -319,7 +319,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -341,7 +341,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -363,7 +363,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -385,7 +385,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -407,7 +407,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -429,7 +429,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -451,7 +451,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -473,7 +473,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -495,7 +495,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -517,7 +517,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -539,7 +539,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -561,7 +561,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/Core-Skills-for-Biomedical-Data-Scientists-2-pages.pdf", - "version": "237960874052008560436652606947751982249", + "version": "0x8DB214A673DD8D8", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json index 3cad0fd85..cca8a4dd1 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.pdf.json @@ -11,7 +11,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -33,7 +33,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -55,7 +55,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -77,7 +77,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -99,7 +99,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -121,7 +121,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -143,7 +143,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -165,7 +165,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -187,7 +187,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -209,7 +209,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -231,7 +231,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -253,7 +253,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -275,7 +275,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -297,7 +297,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -319,7 +319,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -341,7 +341,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -363,7 +363,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -385,7 +385,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -407,7 +407,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -429,7 +429,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -451,7 +451,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -473,7 +473,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -495,7 +495,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -517,7 +517,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -539,7 +539,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -561,7 +561,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -583,7 +583,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -605,7 +605,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -627,7 +627,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -649,7 +649,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -671,7 +671,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -693,7 +693,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -715,7 +715,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -737,7 +737,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -759,7 +759,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -781,7 +781,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -803,7 +803,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -825,7 +825,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -847,7 +847,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -869,7 +869,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -891,7 +891,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -913,7 +913,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -935,7 +935,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -957,7 +957,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -979,7 +979,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1001,7 +1001,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1023,7 +1023,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1045,7 +1045,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1067,7 +1067,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1089,7 +1089,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1111,7 +1111,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1133,7 +1133,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1155,7 +1155,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1177,7 +1177,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1199,7 +1199,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1221,7 +1221,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1243,7 +1243,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1265,7 +1265,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1287,7 +1287,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1309,7 +1309,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1331,7 +1331,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1353,7 +1353,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1375,7 +1375,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1397,7 +1397,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1419,7 +1419,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1441,7 +1441,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1463,7 +1463,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1485,7 +1485,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1507,7 +1507,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1529,7 +1529,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1551,7 +1551,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1573,7 +1573,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1595,7 +1595,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1617,7 +1617,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1639,7 +1639,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1661,7 +1661,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1683,7 +1683,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1705,7 +1705,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1727,7 +1727,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1749,7 +1749,7 @@ "page_number": 2, "data_source": { "url": "abfs://container1/IRS-form-1987.pdf", - "version": "337148261958285544336683139132069637358", + "version": "0x8DB214AEE092B1E", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" diff --git a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json index 4c72f31bb..870978812 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json +++ b/test_unstructured_ingest/expected-structured-output/azure/IRS-form-1987.png.json @@ -11,7 +11,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -33,7 +33,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -55,7 +55,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -77,7 +77,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -99,7 +99,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -121,7 +121,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -143,7 +143,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -165,7 +165,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -187,7 +187,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -209,7 +209,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -231,7 +231,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -253,7 +253,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -275,7 +275,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -297,7 +297,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -319,7 +319,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -341,7 +341,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -363,7 +363,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -385,7 +385,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -407,7 +407,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -429,7 +429,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -451,7 +451,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -473,7 +473,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -495,7 +495,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -517,7 +517,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -539,7 +539,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -561,7 +561,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -583,7 +583,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -605,7 +605,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -627,7 +627,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -649,7 +649,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -671,7 +671,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -693,7 +693,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -715,7 +715,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -737,7 +737,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -759,7 +759,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -781,7 +781,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -803,7 +803,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -825,7 +825,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -847,7 +847,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -869,7 +869,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -891,7 +891,7 @@ "page_number": 1, "data_source": { "url": "abfs://container1/IRS-form-1987.png", - "version": "178514357676599756686300559820761454543", + "version": "0x8DB214C1B270B0D", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" diff --git a/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json b/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json index 0cd30210b..91374854e 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json +++ b/test_unstructured_ingest/expected-structured-output/azure/rfc854.txt.json @@ -10,7 +10,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -31,7 +31,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -52,7 +52,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -73,7 +73,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -94,7 +94,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -115,7 +115,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -136,7 +136,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -157,7 +157,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -178,7 +178,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -199,7 +199,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -220,7 +220,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -241,7 +241,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -262,7 +262,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -283,7 +283,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -304,7 +304,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -325,7 +325,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -346,7 +346,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -367,7 +367,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -388,7 +388,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -409,7 +409,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -430,7 +430,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -451,7 +451,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -472,7 +472,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -493,7 +493,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -514,7 +514,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -535,7 +535,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -556,7 +556,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -577,7 +577,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -598,7 +598,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -619,7 +619,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -640,7 +640,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -661,7 +661,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -682,7 +682,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -703,7 +703,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -724,7 +724,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -745,7 +745,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -766,7 +766,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -787,7 +787,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -808,7 +808,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -829,7 +829,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -850,7 +850,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -871,7 +871,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -892,7 +892,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -913,7 +913,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -934,7 +934,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -955,7 +955,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -976,7 +976,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -997,7 +997,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1018,7 +1018,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1039,7 +1039,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1060,7 +1060,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1081,7 +1081,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1102,7 +1102,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1123,7 +1123,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1144,7 +1144,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1165,7 +1165,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1186,7 +1186,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1207,7 +1207,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1228,7 +1228,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1249,7 +1249,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1270,7 +1270,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1291,7 +1291,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1312,7 +1312,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1333,7 +1333,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1354,7 +1354,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1375,7 +1375,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1396,7 +1396,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1417,7 +1417,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1438,7 +1438,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1459,7 +1459,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1480,7 +1480,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1501,7 +1501,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1522,7 +1522,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1543,7 +1543,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1564,7 +1564,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1585,7 +1585,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1606,7 +1606,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1627,7 +1627,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1648,7 +1648,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1669,7 +1669,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1690,7 +1690,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1711,7 +1711,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1732,7 +1732,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1753,7 +1753,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1774,7 +1774,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1795,7 +1795,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1816,7 +1816,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1837,7 +1837,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1858,7 +1858,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1879,7 +1879,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1900,7 +1900,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1921,7 +1921,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1942,7 +1942,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1963,7 +1963,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1984,7 +1984,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2005,7 +2005,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2026,7 +2026,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2047,7 +2047,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2068,7 +2068,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2089,7 +2089,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2110,7 +2110,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2131,7 +2131,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2152,7 +2152,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2173,7 +2173,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2194,7 +2194,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2215,7 +2215,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2236,7 +2236,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2257,7 +2257,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2278,7 +2278,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2299,7 +2299,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2320,7 +2320,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2341,7 +2341,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2362,7 +2362,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2383,7 +2383,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2404,7 +2404,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2425,7 +2425,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2446,7 +2446,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2467,7 +2467,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2488,7 +2488,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2509,7 +2509,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2530,7 +2530,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2551,7 +2551,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2572,7 +2572,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2593,7 +2593,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2614,7 +2614,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2635,7 +2635,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2656,7 +2656,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2677,7 +2677,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2698,7 +2698,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2719,7 +2719,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2740,7 +2740,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2761,7 +2761,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2782,7 +2782,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2803,7 +2803,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2824,7 +2824,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2845,7 +2845,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2866,7 +2866,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2887,7 +2887,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2908,7 +2908,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2929,7 +2929,7 @@ "filetype": "text/plain", "data_source": { "url": "abfs://container1/rfc854.txt", - "version": "252402046838802114392575683859882596254", + "version": "0x8DB214DA15CE591", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" diff --git a/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json b/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json index 387857ab5..e62bb1938 100644 --- a/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json +++ b/test_unstructured_ingest/expected-structured-output/azure/spring-weather.html.json @@ -10,7 +10,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -31,7 +31,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -52,7 +52,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -81,7 +81,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -108,7 +108,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -135,7 +135,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -162,7 +162,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -189,7 +189,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -216,7 +216,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -243,7 +243,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -270,7 +270,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -297,7 +297,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -324,7 +324,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -351,7 +351,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -378,7 +378,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -405,7 +405,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -432,7 +432,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -459,7 +459,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -486,7 +486,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -513,7 +513,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -540,7 +540,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -567,7 +567,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -594,7 +594,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -621,7 +621,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -648,7 +648,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -675,7 +675,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -702,7 +702,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -729,7 +729,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -756,7 +756,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -783,7 +783,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -810,7 +810,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -837,7 +837,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -864,7 +864,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -891,7 +891,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -918,7 +918,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -945,7 +945,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -972,7 +972,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -999,7 +999,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1026,7 +1026,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1053,7 +1053,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1080,7 +1080,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1107,7 +1107,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1134,7 +1134,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1161,7 +1161,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1188,7 +1188,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1215,7 +1215,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1242,7 +1242,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1269,7 +1269,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1296,7 +1296,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1323,7 +1323,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1350,7 +1350,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1377,7 +1377,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1404,7 +1404,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1431,7 +1431,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1458,7 +1458,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1485,7 +1485,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1512,7 +1512,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1539,7 +1539,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1566,7 +1566,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1593,7 +1593,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1620,7 +1620,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1647,7 +1647,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1674,7 +1674,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1701,7 +1701,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1728,7 +1728,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1755,7 +1755,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1782,7 +1782,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1809,7 +1809,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1836,7 +1836,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1863,7 +1863,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1890,7 +1890,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1917,7 +1917,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1944,7 +1944,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1971,7 +1971,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -1998,7 +1998,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2025,7 +2025,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2052,7 +2052,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2079,7 +2079,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2106,7 +2106,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2133,7 +2133,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2160,7 +2160,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2187,7 +2187,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2214,7 +2214,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2235,7 +2235,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2256,7 +2256,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2283,7 +2283,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2316,7 +2316,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2343,7 +2343,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2364,7 +2364,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2397,7 +2397,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2430,7 +2430,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" @@ -2463,7 +2463,7 @@ "filetype": "text/html", "data_source": { "url": "abfs://container1/spring-weather.html", - "version": "162215905222974206637545574128436022861", + "version": "0x8DB214B74525BB6", "record_locator": { "protocol": "abfs", "remote_file_path": "abfs://container1/" diff --git a/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json b/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json index 39646d9a7..3c7ca733b 100644 --- a/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json +++ b/test_unstructured_ingest/expected-structured-output/box/handbook-1p.docx.json @@ -11,7 +11,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -39,7 +39,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -67,7 +67,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -89,7 +89,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -111,7 +111,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -133,7 +133,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -155,7 +155,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -177,7 +177,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -199,7 +199,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -221,7 +221,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -243,7 +243,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -265,7 +265,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -287,7 +287,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -309,7 +309,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -332,7 +332,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { "url": "box:///utic-test-ingest-fixtures/handbook-1p.docx", - "version": "83125548004193369404829885052395764226", + "version": "1255888824139", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", diff --git a/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json index 1b9c8bad3..d0025fcee 100644 --- a/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/box/nested-1/ideas-page.html.json @@ -11,7 +11,7 @@ "filetype": "text/html", "data_source": { "url": "box:///utic-test-ingest-fixtures/nested-1/ideas-page.html", - "version": "77943175838335685751163845636763163681", + "version": "1255892530552", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", diff --git a/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json index ef9902dfb..e9bc64409 100644 --- a/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/box/nested-1/nested-2/ideas-page.html.json @@ -11,7 +11,7 @@ "filetype": "text/html", "data_source": { "url": "box:///utic-test-ingest-fixtures/nested-1/nested-2/ideas-page.html", - "version": "293680985726204769765169474511274942733", + "version": "1255884723846", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", diff --git a/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json b/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json index 23a1ddae7..2e6dbf696 100644 --- a/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json +++ b/test_unstructured_ingest/expected-structured-output/box/science-exploration-1p.pptx.json @@ -11,7 +11,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -34,7 +34,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -57,7 +57,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -80,7 +80,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -103,7 +103,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -126,7 +126,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -149,7 +149,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -172,7 +172,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -195,7 +195,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -218,7 +218,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -241,7 +241,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -264,7 +264,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", @@ -287,7 +287,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { "url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx", - "version": "309546934335254463247992132065898582121", + "version": "1255894255490", "record_locator": { "protocol": "box", "remote_file_path": "box://utic-test-ingest-fixtures", diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json b/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json index 94e1c93f4..9e61bf43b 100644 --- a/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json +++ b/test_unstructured_ingest/expected-structured-output/dropbox/handbook-1p.docx.json @@ -10,13 +10,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -36,13 +38,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -62,13 +66,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -82,13 +88,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -102,13 +110,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -122,13 +132,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -142,13 +154,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -162,13 +176,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -182,13 +198,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -202,13 +220,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -222,13 +242,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -242,13 +264,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -262,13 +286,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -282,13 +308,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } }, @@ -303,13 +331,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "data_source": { - "url": "dropbox:///test-input/handbook-1p.docx", - "version": "134700592086487568162605251521926324397", + "url": "dropbox://test-input/handbook-1p.docx", + "version": "2ddaae143b824b304ab42bb607d0cd4a96e2d0d0a60a30025e4ce749a53a0b8e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACQ" - } + }, + "date_created": "1687394168.0", + "date_modified": "1697632567.0" } } } diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json index fb02cb1ff..1c500c276 100644 --- a/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/dropbox/nested-1/ideas-page.html.json @@ -10,13 +10,15 @@ ], "filetype": "text/html", "data_source": { - "url": "dropbox:///test-input/nested-1/ideas-page.html", - "version": "67356979305728150851855820427694668063", + "url": "dropbox://test-input/nested-1/ideas-page.html", + "version": "7a31fe250cc57a9733f8d50e61b9b265c53f5dd12faedf4829e559e2c3a8845e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACw" - } + }, + "date_created": "1687394194.0", + "date_modified": "1697632566.0" } } } diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json index 564bd2577..0fa649855 100644 --- a/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/dropbox/nested-2/ideas-page.html.json @@ -10,13 +10,15 @@ ], "filetype": "text/html", "data_source": { - "url": "dropbox:///test-input/nested-2/ideas-page.html", - "version": "145453788782335405288844961545898675998", + "url": "dropbox://test-input/nested-2/ideas-page.html", + "version": "7a31fe250cc57a9733f8d50e61b9b265c53f5dd12faedf4829e559e2c3a8845e", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAADQ" - } + }, + "date_created": "1687394213.0", + "date_modified": "1697632566.0" } } } diff --git a/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json b/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json index c5a44f158..8e59883c8 100644 --- a/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json +++ b/test_unstructured_ingest/expected-structured-output/dropbox/science-exploration-1p.pptx.json @@ -10,13 +10,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -31,13 +33,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -52,13 +56,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -73,13 +79,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -94,13 +102,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -115,13 +125,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -136,13 +148,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -157,13 +171,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -178,13 +194,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -199,13 +217,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -220,13 +240,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -241,13 +263,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } }, @@ -262,13 +286,15 @@ ], "filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "data_source": { - "url": "dropbox:///test-input/science-exploration-1p.pptx", - "version": "26035320120182381452247268381589958225", + "url": "dropbox://test-input/science-exploration-1p.pptx", + "version": "82ebb5e422916b72fa2bd283cae3b9f41b96a9d0af59f92a8edd6e9556ca5510", "record_locator": { "protocol": "dropbox", "remote_file_path": "dropbox://test-input/", "file_id": "id:De4ZYtDd-JoAAAAAAAAACA" - } + }, + "date_created": "1687394162.0", + "date_modified": "1697632567.0" } } } diff --git a/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json index d3a3f0854..bcd7ef201 100644 --- a/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/gcs/ideas-page.html.json @@ -11,7 +11,7 @@ "filetype": "text/html", "data_source": { "url": "gs://utic-test-ingest-fixtures/ideas-page.html", - "version": "199523943725186047835150971481714294476", + "version": "CJXRtOuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json index d49564e20..8c8d34a2f 100644 --- a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json +++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/fake-text.txt.json @@ -10,7 +10,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt", - "version": "180263070579038859328651626981788275889", + "version": "CKyIrMaE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -32,7 +32,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt", - "version": "180263070579038859328651626981788275889", + "version": "CKyIrMaE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -54,7 +54,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt", - "version": "180263070579038859328651626981788275889", + "version": "CKyIrMaE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -76,7 +76,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt", - "version": "180263070579038859328651626981788275889", + "version": "CKyIrMaE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -98,7 +98,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt", - "version": "180263070579038859328651626981788275889", + "version": "CKyIrMaE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -120,7 +120,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/fake-text.txt", - "version": "180263070579038859328651626981788275889", + "version": "CKyIrMaE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json index 662caae8c..e31d5a5e0 100644 --- a/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-1/nested/ideas-page.html.json @@ -11,7 +11,7 @@ "filetype": "text/html", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-1/nested/ideas-page.html", - "version": "310890354306462681752199911957569001015", + "version": "CMWrx8aE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json index 7f5a3c007..22bcb125b 100644 --- a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json +++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/fake-text.txt.json @@ -10,7 +10,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt", - "version": "198731266903969902154134165613731741332", + "version": "CPXPxMuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -32,7 +32,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt", - "version": "198731266903969902154134165613731741332", + "version": "CPXPxMuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -54,7 +54,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt", - "version": "198731266903969902154134165613731741332", + "version": "CPXPxMuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -76,7 +76,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt", - "version": "198731266903969902154134165613731741332", + "version": "CPXPxMuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -98,7 +98,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt", - "version": "198731266903969902154134165613731741332", + "version": "CPXPxMuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -120,7 +120,7 @@ "filetype": "text/plain", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/fake-text.txt", - "version": "198731266903969902154134165613731741332", + "version": "CPXPxMuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json index 4b34ff850..b318f7a12 100644 --- a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json +++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/nested/ideas-page.html.json @@ -11,7 +11,7 @@ "filetype": "text/html", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/nested/ideas-page.html", - "version": "113813498010717860141768546590661839404", + "version": "COXZ3MuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", diff --git a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json index c7a6b9d3b..4931718ff 100644 --- a/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json +++ b/test_unstructured_ingest/expected-structured-output/gcs/nested-2/stanley-cups.xlsx.json @@ -12,7 +12,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx", - "version": "25646232132200560657189097157576319365", + "version": "COul9MuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -37,7 +37,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx", - "version": "25646232132200560657189097157576319365", + "version": "COul9MuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -61,7 +61,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx", - "version": "25646232132200560657189097157576319365", + "version": "COul9MuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", @@ -86,7 +86,7 @@ "filetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "data_source": { "url": "gs://utic-test-ingest-fixtures/nested-2/stanley-cups.xlsx", - "version": "25646232132200560657189097157576319365", + "version": "COul9MuE0/8CEAE=", "record_locator": { "protocol": "gs", "remote_file_path": "gs://utic-test-ingest-fixtures/", diff --git a/test_unstructured_ingest/src/against-api.sh b/test_unstructured_ingest/src/against-api.sh index a4ff8f3ad..7f2d6a944 100755 --- a/test_unstructured_ingest/src/against-api.sh +++ b/test_unstructured_ingest/src/against-api.sh @@ -27,7 +27,7 @@ trap cleanup EXIT TEST_FILE_NAME=layout-parser-paper-with-table.pdf # including pdf-infer-table-structure to validate partition arguments are passed to the api -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --api-key "$UNS_API_KEY" \ diff --git a/test_unstructured_ingest/src/airtable-diff.sh b/test_unstructured_ingest/src/airtable-diff.sh index 3aa9bb638..3cd81eff7 100755 --- a/test_unstructured_ingest/src/airtable-diff.sh +++ b/test_unstructured_ingest/src/airtable-diff.sh @@ -35,7 +35,7 @@ if [ -z "$AIRTABLE_PERSONAL_ACCESS_TOKEN" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ airtable \ --download-dir "$DOWNLOAD_DIR" \ @@ -47,7 +47,6 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --reprocess \ --output-dir "$OUTPUT_DIR" \ --work-dir "$WORK_DIR" \ - --max-retry-time 10 \ --verbose "$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME diff --git a/test_unstructured_ingest/src/airtable-large.sh b/test_unstructured_ingest/src/airtable-large.sh index d15fed2b9..c0bf06fe4 100755 --- a/test_unstructured_ingest/src/airtable-large.sh +++ b/test_unstructured_ingest/src/airtable-large.sh @@ -38,7 +38,7 @@ fi # shellcheck disable=SC1091 source ./scripts/airtable-test-helpers/component_ids.sh -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ airtable \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/astradb.sh b/test_unstructured_ingest/src/astradb.sh index 9aa89c48f..1ea211a6b 100755 --- a/test_unstructured_ingest/src/astradb.sh +++ b/test_unstructured_ingest/src/astradb.sh @@ -22,7 +22,8 @@ fi COLLECTION_NAME="ingest_test_src" -PYTHONPATH=. ./unstructured/ingest/main.py \ +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} +PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ astradb \ --token "$ASTRA_DB_APPLICATION_TOKEN" \ --api-endpoint "$ASTRA_DB_API_ENDPOINT" \ diff --git a/test_unstructured_ingest/src/azure.sh b/test_unstructured_ingest/src/azure.sh index 602f2de43..6744805d6 100755 --- a/test_unstructured_ingest/src/azure.sh +++ b/test_unstructured_ingest/src/azure.sh @@ -21,11 +21,11 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ azure \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ --strategy hi_res \ --preserve-downloads \ diff --git a/test_unstructured_ingest/src/biomed-api.sh b/test_unstructured_ingest/src/biomed-api.sh index 75db5294e..82b29f887 100755 --- a/test_unstructured_ingest/src/biomed-api.sh +++ b/test_unstructured_ingest/src/biomed-api.sh @@ -23,7 +23,7 @@ trap cleanup EXIT "$SCRIPT_DIR"/check-num-files-expected-output.sh 2 $OUTPUT_FOLDER_NAME 10k -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ biomed \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/biomed-path.sh b/test_unstructured_ingest/src/biomed-path.sh index 95effb0b6..12401ed8a 100755 --- a/test_unstructured_ingest/src/biomed-path.sh +++ b/test_unstructured_ingest/src/biomed-path.sh @@ -23,7 +23,7 @@ trap cleanup EXIT "$SCRIPT_DIR"/check-num-files-expected-output.sh 1 $OUTPUT_FOLDER_NAME 10k -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ biomed \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/box.sh b/test_unstructured_ingest/src/box.sh index e9f2462b5..3ab2f44b4 100755 --- a/test_unstructured_ingest/src/box.sh +++ b/test_unstructured_ingest/src/box.sh @@ -38,13 +38,13 @@ if [ -z "$BOX_APP_CONFIG_PATH" ]; then echo "$BOX_APP_CONFIG" >"$BOX_APP_CONFIG_PATH" fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ box \ --download-dir "$DOWNLOAD_DIR" \ --box-app-config "$BOX_APP_CONFIG_PATH" \ --remote-url box://utic-test-ingest-fixtures \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --num-processes "$max_processes" \ --preserve-downloads \ diff --git a/test_unstructured_ingest/src/confluence-diff.sh b/test_unstructured_ingest/src/confluence-diff.sh index 5cc54f93b..dc0f71cd1 100755 --- a/test_unstructured_ingest/src/confluence-diff.sh +++ b/test_unstructured_ingest/src/confluence-diff.sh @@ -31,7 +31,7 @@ if [ -z "$CONFLUENCE_USER_EMAIL" ] || [ -z "$CONFLUENCE_API_TOKEN" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ confluence \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/confluence-large.sh b/test_unstructured_ingest/src/confluence-large.sh index 7b20d0ee0..790d675b9 100755 --- a/test_unstructured_ingest/src/confluence-large.sh +++ b/test_unstructured_ingest/src/confluence-large.sh @@ -37,7 +37,7 @@ fi # are being provided at the same time, which is a wrong way to use the connector. # We expect the test to ignore --confluence-num-of-spaces and use --confluence-list-of-spaces. -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ confluence \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/delta-table.sh b/test_unstructured_ingest/src/delta-table.sh index 7faf23c40..d8ac97145 100755 --- a/test_unstructured_ingest/src/delta-table.sh +++ b/test_unstructured_ingest/src/delta-table.sh @@ -31,7 +31,7 @@ function cleanup() { trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ delta-table \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/discord.sh b/test_unstructured_ingest/src/discord.sh index 64bf18364..ca986e3b0 100755 --- a/test_unstructured_ingest/src/discord.sh +++ b/test_unstructured_ingest/src/discord.sh @@ -29,7 +29,7 @@ if [ -z "$DISCORD_TOKEN" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ discord \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/dropbox.sh b/test_unstructured_ingest/src/dropbox.sh index 414ce0846..ff2c82998 100755 --- a/test_unstructured_ingest/src/dropbox.sh +++ b/test_unstructured_ingest/src/dropbox.sh @@ -34,12 +34,12 @@ fi DROPBOX_RESPONSE=$(curl https://api.dropbox.com/oauth2/token -d refresh_token="$DROPBOX_REFRESH_TOKEN" -d grant_type=refresh_token -d client_id="$DROPBOX_APP_KEY" -d client_secret="$DROPBOX_APP_SECRET") DROPBOX_ACCESS_TOKEN=$(jq -r '.access_token' <<<"$DROPBOX_RESPONSE") -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ dropbox \ --num-processes "$max_processes" \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --preserve-downloads \ --reprocess \ --output-dir "$OUTPUT_DIR" \ diff --git a/test_unstructured_ingest/src/elasticsearch.sh b/test_unstructured_ingest/src/elasticsearch.sh index 1534f0018..9141cde57 100755 --- a/test_unstructured_ingest/src/elasticsearch.sh +++ b/test_unstructured_ingest/src/elasticsearch.sh @@ -37,11 +37,11 @@ trap cleanup EXIT scripts/elasticsearch-test-helpers/source_connector/create-fill-and-check-es.sh wait -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ elasticsearch \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ --preserve-downloads \ --reprocess \ diff --git a/test_unstructured_ingest/src/gcs.sh b/test_unstructured_ingest/src/gcs.sh index 77d2d86c6..5261c1169 100755 --- a/test_unstructured_ingest/src/gcs.sh +++ b/test_unstructured_ingest/src/gcs.sh @@ -34,12 +34,12 @@ fi GCP_INGEST_SERVICE_KEY_FILE=$(mktemp) echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE" -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ gcs \ --num-processes "$max_processes" \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --preserve-downloads \ --reprocess \ --output-dir "$OUTPUT_DIR" \ diff --git a/test_unstructured_ingest/src/github.sh b/test_unstructured_ingest/src/github.sh index a34355333..bea75f359 100755 --- a/test_unstructured_ingest/src/github.sh +++ b/test_unstructured_ingest/src/github.sh @@ -37,7 +37,7 @@ elif [[ "$CI" == "true" ]]; then echo fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} #shellcheck disable=SC2086 PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ github \ diff --git a/test_unstructured_ingest/src/gitlab.sh b/test_unstructured_ingest/src/gitlab.sh index 64ac21353..1bd01b488 100755 --- a/test_unstructured_ingest/src/gitlab.sh +++ b/test_unstructured_ingest/src/gitlab.sh @@ -24,7 +24,7 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ gitlab \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/google-drive.sh b/test_unstructured_ingest/src/google-drive.sh index 36a6ab79b..7e580e8a1 100755 --- a/test_unstructured_ingest/src/google-drive.sh +++ b/test_unstructured_ingest/src/google-drive.sh @@ -35,11 +35,11 @@ fi GCP_INGEST_SERVICE_KEY_FILE=$(mktemp) echo "$GCP_INGEST_SERVICE_KEY" >"$GCP_INGEST_SERVICE_KEY_FILE" -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ google-drive \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.version \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.version \ --num-processes "$max_processes" \ --strategy hi_res \ --preserve-downloads \ @@ -47,7 +47,7 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --output-dir "$OUTPUT_DIR" \ --verbose \ --drive-id 1OQZ66OHBE30rNsNa7dweGLfRmXvkT_jr \ - --service-account-key "$GCP_INGEST_SERVICE_KEY_FILE" \ + --service-account-key-path "$GCP_INGEST_SERVICE_KEY_FILE" \ --recursive \ --extensions "pdf,docx" \ --work-dir "$WORK_DIR" diff --git a/test_unstructured_ingest/src/hubspot.sh b/test_unstructured_ingest/src/hubspot.sh index 86a75630c..d5b617569 100755 --- a/test_unstructured_ingest/src/hubspot.sh +++ b/test_unstructured_ingest/src/hubspot.sh @@ -39,7 +39,8 @@ fi # Can be used multiple times to specify multiple objects. # --custom-properties Custom property to process information from. Comma separated list. -PYTHONPATH=. ./unstructured/ingest/main.py \ +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} +PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ hubspot \ --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.date_created,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/jira.sh b/test_unstructured_ingest/src/jira.sh index 533fc3224..ce6b4e049 100755 --- a/test_unstructured_ingest/src/jira.sh +++ b/test_unstructured_ingest/src/jira.sh @@ -50,7 +50,7 @@ fi # Note: When any of the optional arguments are provided, connector will ingest only those components, and nothing else. # When none of the optional arguments are provided, all issues in all projects will be ingested. -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ jira \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/kafka-local.sh b/test_unstructured_ingest/src/kafka-local.sh index c2ed84d0b..36b21754f 100755 --- a/test_unstructured_ingest/src/kafka-local.sh +++ b/test_unstructured_ingest/src/kafka-local.sh @@ -57,7 +57,7 @@ python "$SCRIPT_DIR"/python/test-produce-kafka-message.py up \ --confluent false \ --port 29092 -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ kafka \ --bootstrap-server localhost \ diff --git a/test_unstructured_ingest/src/local-embed-bedrock.sh b/test_unstructured_ingest/src/local-embed-bedrock.sh index 1d23431cf..285d15a56 100755 --- a/test_unstructured_ingest/src/local-embed-bedrock.sh +++ b/test_unstructured_ingest/src/local-embed-bedrock.sh @@ -24,17 +24,17 @@ if [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --verbose \ --reprocess \ --input-path example-docs/book-war-and-peace-1p.txt \ --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-aws-bedrock" \ + --embedding-provider "aws-bedrock" \ --embedding-aws-access-key-id "$AWS_ACCESS_KEY_ID" \ --embedding-aws-secret-access-key "$AWS_SECRET_ACCESS_KEY" diff --git a/test_unstructured_ingest/src/local-embed-mixedbreadai.sh b/test_unstructured_ingest/src/local-embed-mixedbreadai.sh index 75d949c89..99168d7dd 100755 --- a/test_unstructured_ingest/src/local-embed-mixedbreadai.sh +++ b/test_unstructured_ingest/src/local-embed-mixedbreadai.sh @@ -22,10 +22,8 @@ function cleanup() { } trap cleanup EXIT -# Define the run script -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} - # Run the ingestion script with the specified parameters +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/local-embed-octoai.sh b/test_unstructured_ingest/src/local-embed-octoai.sh index e75ee6dc5..54ff3e2a0 100755 --- a/test_unstructured_ingest/src/local-embed-octoai.sh +++ b/test_unstructured_ingest/src/local-embed-octoai.sh @@ -25,7 +25,7 @@ if [ -z "$OCTOAI_API_KEY" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/local-embed-vertexai.sh b/test_unstructured_ingest/src/local-embed-vertexai.sh index b7342fa75..4ef499bc5 100755 --- a/test_unstructured_ingest/src/local-embed-vertexai.sh +++ b/test_unstructured_ingest/src/local-embed-vertexai.sh @@ -25,17 +25,17 @@ if [ -z "$GCP_INGEST_SERVICE_KEY" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --verbose \ --reprocess \ --input-path example-docs/book-war-and-peace-1p.txt \ --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-vertexai" \ + --embedding-provider "vertexai" \ --embedding-api-key "$GCP_INGEST_SERVICE_KEY" \ --embedding-model-name "textembedding-gecko@001" diff --git a/test_unstructured_ingest/src/local-embed-voyageai.sh b/test_unstructured_ingest/src/local-embed-voyageai.sh index 62f5c60d3..c5f3be1fe 100755 --- a/test_unstructured_ingest/src/local-embed-voyageai.sh +++ b/test_unstructured_ingest/src/local-embed-voyageai.sh @@ -25,7 +25,7 @@ if [ -z "$VOYAGE_API_KEY" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ @@ -35,7 +35,7 @@ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ --reprocess \ --input-path example-docs/book-war-and-peace-1p.txt \ --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-voyageai" \ + --embedding-provider "voyageai" \ --embedding-api-key "$VOYAGE_API_KEY" \ --embedding-model-name "voyage-large-2" diff --git a/test_unstructured_ingest/src/local-embed.sh b/test_unstructured_ingest/src/local-embed.sh index 0b8d540e3..210a7111c 100755 --- a/test_unstructured_ingest/src/local-embed.sh +++ b/test_unstructured_ingest/src/local-embed.sh @@ -19,17 +19,17 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --verbose \ --reprocess \ --input-path example-docs/book-war-and-peace-1p.txt \ --work-dir "$WORK_DIR" \ - --embedding-provider "langchain-huggingface" + --embedding-provider "huggingface" set +e diff --git a/test_unstructured_ingest/src/local-failed-partition.sh b/test_unstructured_ingest/src/local-failed-partition.sh index dbe4f1c77..a230888b3 100755 --- a/test_unstructured_ingest/src/local-failed-partition.sh +++ b/test_unstructured_ingest/src/local-failed-partition.sh @@ -38,7 +38,7 @@ function check() { fi } -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/local-single-file-basic-chunking.sh b/test_unstructured_ingest/src/local-single-file-basic-chunking.sh index 7786e1c63..575bd876f 100755 --- a/test_unstructured_ingest/src/local-single-file-basic-chunking.sh +++ b/test_unstructured_ingest/src/local-single-file-basic-chunking.sh @@ -22,8 +22,7 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} - +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --chunking-strategy basic \ diff --git a/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh b/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh index 452686eeb..051c5fba2 100755 --- a/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh +++ b/test_unstructured_ingest/src/local-single-file-chunk-no-orig-elements.sh @@ -33,14 +33,13 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} - +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --chunking-strategy by_title \ - --chunk-no-include-orig-elements \ + --no-chunk-include-orig-elements \ --chunk-max-characters 2000 \ - --chunk-no-multipage-sections \ + --no-chunk-multipage-sections \ --input-path "$ABS_INPUT_PATH" \ --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/local-single-file-with-encoding.sh b/test_unstructured_ingest/src/local-single-file-with-encoding.sh index 016177073..3cf91223e 100755 --- a/test_unstructured_ingest/src/local-single-file-with-encoding.sh +++ b/test_unstructured_ingest/src/local-single-file-with-encoding.sh @@ -20,11 +20,11 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --metadata-exclude filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --encoding cp1252 \ --verbose \ diff --git a/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh b/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh index 4265d0c4f..4c0ab5b36 100755 --- a/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh +++ b/test_unstructured_ingest/src/local-single-file-with-pdf-infer-table-structure.sh @@ -20,11 +20,11 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --skip-infer-table-types "xls,xlsx" \ --strategy hi_res \ diff --git a/test_unstructured_ingest/src/local-single-file.sh b/test_unstructured_ingest/src/local-single-file.sh index 14804f085..249746ed8 100755 --- a/test_unstructured_ingest/src/local-single-file.sh +++ b/test_unstructured_ingest/src/local-single-file.sh @@ -22,11 +22,11 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --output-dir "$OUTPUT_DIR" \ --additional-partition-args '{"strategy":"ocr_only", "languages":["ind", "est"]}' \ --verbose \ diff --git a/test_unstructured_ingest/src/local.sh b/test_unstructured_ingest/src/local.sh index deac065b5..ac725144c 100755 --- a/test_unstructured_ingest/src/local.sh +++ b/test_unstructured_ingest/src/local.sh @@ -19,7 +19,7 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/mongodb.sh b/test_unstructured_ingest/src/mongodb.sh index 553014266..8429d7e1f 100755 --- a/test_unstructured_ingest/src/mongodb.sh +++ b/test_unstructured_ingest/src/mongodb.sh @@ -25,9 +25,10 @@ fi # astradb dependencies. # ref: https://pymongo.readthedocs.io/en/stable/installation.html python -m pip uninstall -y bson pymongo -make install-ingest-mongodb +pip install "unstructured-ingest[mongodb]" -PYTHONPATH=. ./unstructured/ingest/main.py \ +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} +PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ mongodb \ --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.date_created,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/notion.sh b/test_unstructured_ingest/src/notion.sh index 063a9199e..e80a11bfa 100755 --- a/test_unstructured_ingest/src/notion.sh +++ b/test_unstructured_ingest/src/notion.sh @@ -29,7 +29,7 @@ if [ -z "$NOTION_API_KEY" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ notion \ --metadata-exclude coordinates,filename,file_directory,metadata.last_modified,metadata.data_source.date_processed,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ diff --git a/test_unstructured_ingest/src/onedrive.sh b/test_unstructured_ingest/src/onedrive.sh index 0dfa3263a..d38b7ab80 100755 --- a/test_unstructured_ingest/src/onedrive.sh +++ b/test_unstructured_ingest/src/onedrive.sh @@ -29,11 +29,11 @@ if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_USER_PNAME" ] exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ onedrive \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ --strategy hi_res \ --preserve-downloads \ diff --git a/test_unstructured_ingest/src/opensearch.sh b/test_unstructured_ingest/src/opensearch.sh index 0b0a412a3..f1d7c150e 100755 --- a/test_unstructured_ingest/src/opensearch.sh +++ b/test_unstructured_ingest/src/opensearch.sh @@ -35,11 +35,11 @@ trap cleanup EXIT scripts/opensearch-test-helpers/source_connector/create-and-check-opensearch.sh wait -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ opensearch \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ --preserve-downloads \ --reprocess \ diff --git a/test_unstructured_ingest/src/outlook.sh b/test_unstructured_ingest/src/outlook.sh index 890037070..a1a5a4878 100755 --- a/test_unstructured_ingest/src/outlook.sh +++ b/test_unstructured_ingest/src/outlook.sh @@ -29,7 +29,7 @@ if [ -z "$MS_CLIENT_ID" ] || [ -z "$MS_CLIENT_CRED" ] || [ -z "$MS_TENANT_ID" ] exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ outlook \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/pdf-fast-reprocess.sh b/test_unstructured_ingest/src/pdf-fast-reprocess.sh index a0dda9375..b27e32e8e 100755 --- a/test_unstructured_ingest/src/pdf-fast-reprocess.sh +++ b/test_unstructured_ingest/src/pdf-fast-reprocess.sh @@ -28,10 +28,10 @@ trap cleanup EXIT echo "REPROCESS INPUT PATH" ls "$INPUT_PATH" -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ local \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ --strategy fast \ --reprocess \ diff --git a/test_unstructured_ingest/src/s3-compression.sh b/test_unstructured_ingest/src/s3-compression.sh index 1d1faabee..7ee066f3a 100755 --- a/test_unstructured_ingest/src/s3-compression.sh +++ b/test_unstructured_ingest/src/s3-compression.sh @@ -20,7 +20,7 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ s3 \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/s3-minio.sh b/test_unstructured_ingest/src/s3-minio.sh index c6011be05..85dd8f85d 100755 --- a/test_unstructured_ingest/src/s3-minio.sh +++ b/test_unstructured_ingest/src/s3-minio.sh @@ -32,13 +32,13 @@ trap cleanup EXIT scripts/minio-test-helpers/create-and-check-minio.sh wait -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key \ PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ s3 \ --num-processes "$max_processes" \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.date_created \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.date_modified,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth,metadata.data_source.date_created \ --strategy hi_res \ --preserve-downloads \ --reprocess \ diff --git a/test_unstructured_ingest/src/s3.sh b/test_unstructured_ingest/src/s3.sh index 61e0fe13d..bfdc72c1c 100755 --- a/test_unstructured_ingest/src/s3.sh +++ b/test_unstructured_ingest/src/s3.sh @@ -23,12 +23,12 @@ trap cleanup EXIT "$SCRIPT_DIR"/check-num-files-expected-output.sh 3 $OUTPUT_FOLDER_NAME 20k -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ s3 \ --num-processes "$max_processes" \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --strategy hi_res \ --preserve-downloads \ --reprocess \ diff --git a/test_unstructured_ingest/src/salesforce.sh b/test_unstructured_ingest/src/salesforce.sh index 8ebce46a1..54ebd0555 100755 --- a/test_unstructured_ingest/src/salesforce.sh +++ b/test_unstructured_ingest/src/salesforce.sh @@ -43,15 +43,15 @@ if [ -z "$SALESFORCE_PRIVATE_KEY_PATH" ]; then echo "$SALESFORCE_PRIVATE_KEY" >"$SALESFORCE_PRIVATE_KEY_PATH" fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ salesforce \ --categories "EmailMessage,Campaign" \ --download-dir "$DOWNLOAD_DIR" \ --username "$SALESFORCE_USERNAME" \ --consumer-key "$SALESFORCE_CONSUMER_KEY" \ - --private-key "$SALESFORCE_PRIVATE_KEY_PATH" \ - --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ + --private-key-path "$SALESFORCE_PRIVATE_KEY_PATH" \ + --metadata-exclude coordinates,filename,file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.last_modified,metadata.detection_class_prob,metadata.parent_id,metadata.category_depth \ --num-processes "$max_processes" \ --preserve-downloads \ --reprocess \ diff --git a/test_unstructured_ingest/src/sftp.sh b/test_unstructured_ingest/src/sftp.sh index 3386b3a8c..e3312224d 100755 --- a/test_unstructured_ingest/src/sftp.sh +++ b/test_unstructured_ingest/src/sftp.sh @@ -33,12 +33,12 @@ trap cleanup EXIT scripts/sftp-test-helpers/create-and-check-sftp.sh wait -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ sftp \ --num-processes "$max_processes" \ --download-dir "$DOWNLOAD_DIR" \ - --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.last_modified,metadata.data_source.version \ + --metadata-exclude file_directory,metadata.data_source.date_processed,metadata.data_source.filesize_bytes,metadata.data_source.date_created,metadata.data_source.date_modified,metadata.last_modified,metadata.data_source.version \ --preserve-downloads \ --reprocess \ --output-dir "$OUTPUT_DIR" \ diff --git a/test_unstructured_ingest/src/sharepoint-with-permissions.sh b/test_unstructured_ingest/src/sharepoint-with-permissions.sh index 1b00bdd96..cc16c1135 100755 --- a/test_unstructured_ingest/src/sharepoint-with-permissions.sh +++ b/test_unstructured_ingest/src/sharepoint-with-permissions.sh @@ -39,7 +39,7 @@ fi # excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly # excluding metadata.data_source.permissions_data since the api has deprecation warnings. Will want to do a separate test for permissions data -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ sharepoint \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/sharepoint.sh b/test_unstructured_ingest/src/sharepoint.sh index ff5d0dd83..ea07410d2 100755 --- a/test_unstructured_ingest/src/sharepoint.sh +++ b/test_unstructured_ingest/src/sharepoint.sh @@ -31,7 +31,7 @@ if [ -z "$SHAREPOINT_CLIENT_ID" ] || [ -z "$SHAREPOINT_CRED" ]; then fi # excluding metadata.last_modified since this will always update as date processed because the Sharepoint connector creates documents on the fly -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ sharepoint \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/src/slack.sh b/test_unstructured_ingest/src/slack.sh index 6e76e0f34..503e67240 100755 --- a/test_unstructured_ingest/src/slack.sh +++ b/test_unstructured_ingest/src/slack.sh @@ -29,7 +29,7 @@ if [ -z "$SLACK_TOKEN" ]; then exit 8 fi -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ slack \ --num-processes "$max_processes" \ diff --git a/test_unstructured_ingest/src/wikipedia.sh b/test_unstructured_ingest/src/wikipedia.sh index 24f8c0855..21a55e572 100755 --- a/test_unstructured_ingest/src/wikipedia.sh +++ b/test_unstructured_ingest/src/wikipedia.sh @@ -24,7 +24,7 @@ function cleanup() { } trap cleanup EXIT -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} +RUN_SCRIPT=${RUN_SCRIPT:-unstructured-ingest} PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" \ wikipedia \ --download-dir "$DOWNLOAD_DIR" \ diff --git a/test_unstructured_ingest/test-help.sh b/test_unstructured_ingest/test-help.sh deleted file mode 100755 index 9ec8a9824..000000000 --- a/test_unstructured_ingest/test-help.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash - -set -u -o pipefail -e - -RUN_SCRIPT=${RUN_SCRIPT:-./unstructured/ingest/main.py} -sources=$(PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" --help | sed -e '1,/Commands/ d' | awk '{NF=1}1') -echo "Checking all source: $sources" -for src in $sources; do - destinations=$(PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" "$src" --help | sed -e '1,/Destinations/ d' | awk '{NF=1}1') - for dest in $destinations; do - echo "Checking $src -> $dest" - PYTHONPATH=${PYTHONPATH:-.} "$RUN_SCRIPT" "$src" "$dest" --help - done -done diff --git a/test_unstructured_ingest/test-ingest-src.sh b/test_unstructured_ingest/test-ingest-src.sh index 1ebb3dc58..8634b330f 100755 --- a/test_unstructured_ingest/test-ingest-src.sh +++ b/test_unstructured_ingest/test-ingest-src.sh @@ -44,7 +44,8 @@ all_tests=( 'elasticsearch.sh' 'confluence-diff.sh' 'confluence-large.sh' - 'airtable-diff.sh' + # NOTE(christine): This test is disabled because it is triggering 404 client errors to the API + # 'airtable-diff.sh' # # NOTE(ryan): This test is disabled because it is triggering too many requests to the API # 'airtable-large.sh' 'local-single-file.sh' diff --git a/test_unstructured_ingest/unit/cli/test_cli.py b/test_unstructured_ingest/unit/cli/test_cli.py deleted file mode 100644 index b0fcf50cc..000000000 --- a/test_unstructured_ingest/unit/cli/test_cli.py +++ /dev/null @@ -1,18 +0,0 @@ -import click -import pytest - -from unstructured.ingest.cli.interfaces import CliMixin - - -def test_add_params(): - @click.command() - def sample_cmd(): - pass - - options = [ - click.Option(["--opt1"]), - click.Option(["--opt1"]), - ] - cmd = sample_cmd - with pytest.raises(ValueError): - CliMixin.add_params(cmd=cmd, params=options) diff --git a/test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py b/test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py deleted file mode 100644 index 60a14e987..000000000 --- a/test_unstructured_ingest/unit/connector/fsspec/test_connector_gcs.py +++ /dev/null @@ -1,35 +0,0 @@ -from unittest.mock import MagicMock - -import pytest - -from unstructured.ingest.connector.fsspec.gcs import GcsAccessConfig - - -@pytest.mark.parametrize( - ("given_access_token", "then_access_token"), - [ - (None, None), - ("/tmp/gcs.key", "/tmp/gcs.key"), - ("google_default", "google_default"), - ("cache", "cache"), - ("anon", "anon"), - ("browser", "browser"), - ("cloud", "cloud"), - ("{'some_key': 'some_value'}", {"some_key": "some_value"}), - ], -) -def test_validate_access_token(mocker, given_access_token, then_access_token): - mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file") - mocked_isfile.return_value = True - - when_token = GcsAccessConfig(token=given_access_token).token - assert when_token == then_access_token - - -def test_fail_validate_access_token(mocker): - mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file") - mocked_isfile.return_value = False - - given_access_token = "/tmp/gcs.key" - with pytest.raises(ValueError): - GcsAccessConfig(token=given_access_token) diff --git a/test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py b/test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py deleted file mode 100644 index edbe543dc..000000000 --- a/test_unstructured_ingest/unit/connector/fsspec/test_fsspec.py +++ /dev/null @@ -1,25 +0,0 @@ -from unittest.mock import MagicMock, patch - -from fsspec import AbstractFileSystem - -from unstructured.ingest.connector.fsspec.fsspec import FsspecIngestDoc, SimpleFsspecConfig -from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig - - -@patch("fsspec.get_filesystem_class") -def test_version_is_string(mock_get_filesystem_class): - """ - Test that the version is a string even when the filesystem checksum is an integer. - """ - mock_fs = MagicMock(spec=AbstractFileSystem) - mock_fs.checksum.return_value = 1234567890 - mock_fs.info.return_value = {"etag": ""} - mock_get_filesystem_class.return_value = lambda **kwargs: mock_fs - config = SimpleFsspecConfig("s3://my-bucket", access_config={}) - doc = FsspecIngestDoc( - processor_config=ProcessorConfig(), - read_config=ReadConfig(), - connector_config=config, - remote_file_path="test.txt", - ) - assert isinstance(doc.source_metadata.version, str) diff --git a/test_unstructured_ingest/unit/connector/fsspec/test_paths.py b/test_unstructured_ingest/unit/connector/fsspec/test_paths.py deleted file mode 100644 index de3648914..000000000 --- a/test_unstructured_ingest/unit/connector/fsspec/test_paths.py +++ /dev/null @@ -1,223 +0,0 @@ -from dataclasses import dataclass -from pathlib import Path - -import pytest - -from unstructured.ingest.connector.fsspec.dropbox import ( - DropboxIngestDoc, -) -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecIngestDoc, -) -from unstructured.ingest.connector.fsspec.sftp import SftpAccessConfig, SimpleSftpConfig -from unstructured.ingest.interfaces import ( - FsspecConfig, -) - - -@dataclass -class FakeConfigDropboxRoot: - output_dir = "/fakeuser/fake_output" - dir_path = " " - download_dir = "/fakeuser/fake_download" - path_without_protocol = " " - - -@dataclass -class FakeConfigFolder: - output_dir = "/fakeuser/fake_output" - dir_path = "fake_folder" - download_dir = "/fakeuser/fake_download" - path_without_protocol = "fake_folder" - - -def test_dropbox_root_succeeds(): - """ - Test that path joining method works for Dropbox root folder. - Note slash in front of remote_file_path. - """ - dbox = DropboxIngestDoc( - connector_config=FakeConfigDropboxRoot, - read_config=FakeConfigDropboxRoot, - processor_config=FakeConfigDropboxRoot, - remote_file_path="/fake_file.txt", - ) - output_filename = dbox._output_filename - download_filename = dbox._tmp_download_file() - - assert output_filename == Path("/fakeuser/fake_output/fake_file.txt.json") - assert download_filename == Path("/fakeuser/fake_download/fake_file.txt") - - -def test_dropbox_root_succeeds2(): - """ - Test that path joining method works for Dropbox root folder. - Note lack of slash in front of remote_file_path. This still works. - """ - dbox = DropboxIngestDoc( - connector_config=FakeConfigDropboxRoot, - read_config=FakeConfigDropboxRoot, - processor_config=FakeConfigDropboxRoot, - remote_file_path="fake_file.txt", - ) - output_filename = dbox._output_filename - download_filename = dbox._tmp_download_file() - - assert output_filename == Path("/fakeuser/fake_output/fake_file.txt.json") - assert download_filename == Path("/fakeuser/fake_download/fake_file.txt") - - -def test_dropbox_folder_succeeds(): - """ - Test that path joining method works for Dropbox root folder. - Note no slash in front of remote_file_path. - """ - dbox = DropboxIngestDoc( - connector_config=FakeConfigFolder, - read_config=FakeConfigFolder, - processor_config=FakeConfigFolder, - remote_file_path="fake_file2.txt", - ) - output_filename = dbox._output_filename - download_filename = dbox._tmp_download_file() - - assert output_filename == Path("/fakeuser/fake_output/fake_file2.txt.json") - assert download_filename == Path("/fakeuser/fake_download/fake_file2.txt") - - -def test_dropbox_folder_fails(): - """Test that path joining method gives WRONG path. Note slash in front of remote_file_path. - Path joining is sensitive. Note that the path is MISSING the folders.""" - dbox = DropboxIngestDoc( - connector_config=FakeConfigFolder, - read_config=FakeConfigFolder, - processor_config=FakeConfigFolder, - remote_file_path="/fake_file2.txt", - ) - output_filename = dbox._output_filename - download_filename = dbox._tmp_download_file() - - assert output_filename == Path("/fake_file2.txt.json") - assert download_filename == Path("/fake_file2.txt") - - -def test_fsspec_folder_succeeds(): - """ - Test that path joining method works for root folder. - Note no slash in front of remote_file_path. - """ - dbox = FsspecIngestDoc( - connector_config=FakeConfigFolder, - read_config=FakeConfigFolder, - processor_config=FakeConfigFolder, - remote_file_path="fake_file2.txt", - ) - output_filename = dbox._output_filename - download_filename = dbox._tmp_download_file() - - assert output_filename == Path("/fakeuser/fake_output/fake_file2.txt.json") - assert download_filename == Path("/fakeuser/fake_download/fake_file2.txt") - - -def test_fsspec_folder_fails(): - """Test that path joining method gives WRONG path. Note slash in front of remote_file_path. - Path joining is sensitive. Note that the path is MISSING the folders.""" - fstest = FsspecIngestDoc( - connector_config=FakeConfigFolder, - read_config=FakeConfigFolder, - processor_config=FakeConfigFolder, - remote_file_path="/fake_file2.txt", - ) - output_filename = fstest._output_filename - download_filename = fstest._tmp_download_file() - - assert output_filename == Path("/fake_file2.txt.json") - assert download_filename == Path("/fake_file2.txt") - - -def test_post_init_invalid_protocol(): - """Validate that an invalid protocol raises a ValueError""" - with pytest.raises(ValueError): - FsspecConfig(remote_url="ftp://example.com/path/to/file.txt") - - -def test_fsspec_path_extraction_dropbox_root(): - """Validate that the path extraction works for dropbox root""" - config = FsspecConfig(remote_url="dropbox:// /") - assert config.protocol == "dropbox" - assert config.path_without_protocol == " /" - assert config.dir_path == " " - assert config.file_path == "" - - -def test_fsspec_path_extraction_dropbox_subfolder(): - """Validate that the path extraction works for dropbox subfolder""" - config = FsspecConfig(remote_url="dropbox://path") - assert config.protocol == "dropbox" - assert config.path_without_protocol == "path" - assert config.dir_path == "path" - assert config.file_path == "" - - -def test_fsspec_path_extraction_s3_bucket_only(): - """Validate that the path extraction works for s3 bucket without filename""" - config = FsspecConfig(remote_url="s3://bucket-name") - assert config.protocol == "s3" - assert config.path_without_protocol == "bucket-name" - assert config.dir_path == "bucket-name" - assert config.file_path == "" - - -def test_fsspec_path_extraction_s3_valid_path(): - """Validate that the path extraction works for s3 bucket with filename""" - config = FsspecConfig(remote_url="s3://bucket-name/path/to/file.txt") - assert config.protocol == "s3" - assert config.path_without_protocol == "bucket-name/path/to/file.txt" - assert config.dir_path == "bucket-name" - assert config.file_path == "path/to/file.txt" - - -def test_fsspec_path_extraction_s3_invalid_path(): - """Validate that an invalid s3 path (that mimics triple slash for dropbox) - raises a ValueError""" - with pytest.raises(ValueError): - FsspecConfig(remote_url="s3:///bucket-name/path/to") - - -def test_sftp_path_extraction_post_init_with_extension(): - """Validate that the path extraction works for sftp with file extension""" - config = SimpleSftpConfig( - remote_url="sftp://example.com/path/to/file.txt", - access_config=SftpAccessConfig(username="username", password="password", host="", port=22), - ) - assert config.file_path == "file.txt" - assert config.dir_path == "path/to" - assert config.path_without_protocol == "path/to" - assert config.access_config.host == "example.com" - assert config.access_config.port == 22 - - -def test_sftp_path_extraction_without_extension(): - """Validate that the path extraction works for sftp without extension""" - config = SimpleSftpConfig( - remote_url="sftp://example.com/path/to/directory", - access_config=SftpAccessConfig(username="username", password="password", host="", port=22), - ) - assert config.file_path == "" - assert config.dir_path == "path/to/directory" - assert config.path_without_protocol == "path/to/directory" - assert config.access_config.host == "example.com" - assert config.access_config.port == 22 - - -def test_sftp_path_extraction_with_port(): - """Validate that the path extraction works for sftp with a non-default port""" - config = SimpleSftpConfig( - remote_url="sftp://example.com:47474/path/to/file.txt", - access_config=SftpAccessConfig(username="username", password="password", host="", port=22), - ) - assert config.file_path == "file.txt" - assert config.dir_path == "path/to" - assert config.path_without_protocol == "path/to" - assert config.access_config.host == "example.com" - assert config.access_config.port == 47474 diff --git a/test_unstructured_ingest/unit/connector/test_connector_git.py b/test_unstructured_ingest/unit/connector/test_connector_git.py deleted file mode 100644 index 88760df16..000000000 --- a/test_unstructured_ingest/unit/connector/test_connector_git.py +++ /dev/null @@ -1,61 +0,0 @@ -from pathlib import Path - -import pytest - -from unstructured.ingest.connector.git import GitAccessConfig, GitSourceConnector, SimpleGitConfig - - -@pytest.mark.parametrize( - ("given_file_path", "then_is_supported"), - [ - (Path("src/submodule/document.md"), True), - (Path("src/submodule/document.txt"), True), - (Path("src/submodule/document.pdf"), True), - (Path("src/submodule/document.doc"), True), - (Path("src/submodule/document.docx"), True), - (Path("src/submodule/document.eml"), True), - (Path("src/submodule/document.html"), True), - (Path("src/submodule/document.png"), True), - (Path("src/submodule/document.jpg"), True), - (Path("src/submodule/document.ppt"), True), - (Path("src/submodule/document.pptx"), True), - (Path("src/submodule/document.xml"), True), - (Path("src/submodule/code.py"), False), - (Path("src/submodule/Dockerfile"), False), - (Path("src/submodule/Makefile"), False), - (Path("src/submodule/LICENSE"), False), - ], -) -def test_connector_supports_file(given_file_path, then_is_supported): - when_is_supported = GitSourceConnector.is_file_type_supported(str(given_file_path)) - - assert when_is_supported == then_is_supported - - -class FakeGitSourceConnectorImpl(GitSourceConnector): - def get_ingest_docs(self): - pass - - -@pytest.mark.parametrize( - ("given_file_path", "given_file_glob", "then_matches_glob"), - [ - (Path("LICENSE"), None, True), - (Path("Makefile"), ["Makefile"], True), - (Path("src/my/super/module/main.py"), ["**/*.py"], True), - (Path("src/my/super/module/main.pyc"), ["**/*.py"], False), - ], -) -def test_connector_does_path_match_glob(given_file_path, given_file_glob, then_matches_glob): - connector_config = SimpleGitConfig( - url="some_fake_url", - access_config=GitAccessConfig(access_token="some_fake_token"), - file_glob=given_file_glob, - ) - connector = FakeGitSourceConnectorImpl( - processor_config=None, read_config=None, connector_config=connector_config - ) - - when_matches_glob = connector.does_path_match_glob(str(given_file_path)) - - assert when_matches_glob == then_matches_glob diff --git a/test_unstructured_ingest/unit/connector/test_salesforce_connector.py b/test_unstructured_ingest/unit/connector/test_salesforce_connector.py deleted file mode 100644 index 29643ec2b..000000000 --- a/test_unstructured_ingest/unit/connector/test_salesforce_connector.py +++ /dev/null @@ -1,57 +0,0 @@ -from pathlib import Path -from unittest.mock import MagicMock - -import pytest -from cryptography.hazmat.primitives import serialization -from cryptography.hazmat.primitives.asymmetric import dsa, ec, rsa - -from unstructured.ingest.connector.salesforce import SalesforceAccessConfig - - -def pkey_to_str(key) -> str: - return key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption(), - ).decode("utf-8") - - -def rsa_private_key() -> str: - return pkey_to_str(rsa.generate_private_key(0x10001, 2048)) - - -def brainpoolp512r1_private_key() -> str: - return pkey_to_str(ec.generate_private_key(ec.BrainpoolP512R1)) - - -def dsa_private_key() -> str: - return pkey_to_str(dsa.generate_private_key(1024)) - - -@pytest.mark.parametrize( - ("private_key", "private_key_type"), - [ - (rsa_private_key(), str), - (brainpoolp512r1_private_key(), str), - (dsa_private_key(), str), - ("some_path/priv.key", Path), - ], -) -def test_private_key_type(mocker, private_key, private_key_type): - mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file") - mocked_isfile.return_value = True - - config = SalesforceAccessConfig(consumer_key="asdf", private_key=private_key) - actual_pkey_value, actual_pkey_type = config.get_private_key_value_and_type() - assert actual_pkey_type == private_key_type - assert actual_pkey_value == private_key - - -def test_private_key_type_fail(mocker): - mocked_isfile: MagicMock = mocker.patch("pathlib.Path.is_file") - mocked_isfile.return_value = False - - given_nonexistent_path = "some_path/priv.key" - with pytest.raises(expected_exception=ValueError): - config = SalesforceAccessConfig(consumer_key="asdf", private_key=given_nonexistent_path) - config.get_private_key_value_and_type() diff --git a/test_unstructured_ingest/unit/connector/test_serialization.py b/test_unstructured_ingest/unit/connector/test_serialization.py deleted file mode 100644 index f7043e996..000000000 --- a/test_unstructured_ingest/unit/connector/test_serialization.py +++ /dev/null @@ -1,46 +0,0 @@ -from unstructured.ingest.connector.local import LocalIngestDoc, SimpleLocalConfig -from unstructured.ingest.connector.registry import ( - create_ingest_doc_from_dict, - create_ingest_doc_from_json, -) -from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig - -doc = LocalIngestDoc( - path="test_unstructured_ingest/example-docs/layout-parser-paper.pdf", - connector_config=SimpleLocalConfig(input_path="test_unstructured_ingest/example-docs/"), - processor_config=ProcessorConfig(), - read_config=ReadConfig(), -) -doc.update_source_metadata() -serialized_json = doc.to_json() -serialized_dict = doc.to_dict() - - -def test_manual_deserialization(): - deserialized_doc = LocalIngestDoc.from_json(serialized_json) - assert doc == deserialized_doc - - -def test_registry_from_json(): - deserialized_doc = create_ingest_doc_from_json(serialized_json) - assert doc == deserialized_doc - - -def test_registry_from_dict(): - deserialized_doc = create_ingest_doc_from_dict(serialized_dict) - assert doc == deserialized_doc - - -def test_source_metadata_serialization(): - doc = LocalIngestDoc( - path="test_unstructured_ingest/example-docs/layout-parser-paper.pdf", - connector_config=SimpleLocalConfig(input_path="test_unstructured_ingest/example-docs/"), - processor_config=ProcessorConfig(), - read_config=ReadConfig(), - ) - serialized_json = doc.to_dict() - assert not serialized_json["_source_metadata"] - - doc.update_source_metadata() - serialized_json_w_meta = doc.to_dict() - assert serialized_json_w_meta["_source_metadata"] diff --git a/test_unstructured_ingest/unit/connector/test_sharepoint.py b/test_unstructured_ingest/unit/connector/test_sharepoint.py deleted file mode 100644 index c48747fb9..000000000 --- a/test_unstructured_ingest/unit/connector/test_sharepoint.py +++ /dev/null @@ -1,59 +0,0 @@ -from datetime import datetime -from unittest.mock import MagicMock - -import pytest - -from unstructured.ingest.connector.sharepoint import SharepointIngestDoc -from unstructured.ingest.interfaces import ProcessorConfig, ReadConfig - - -@pytest.mark.parametrize( - ("time_created", "time_last_modified", "expected_created", "expected_modified"), - [ - ( - "2023-06-16T05:05:05+00:00", - datetime(2023, 6, 16, 5, 5, 5), - "2023-06-16T05:05:05+00:00", - "2023-06-16T05:05:05", - ), - ("2023-06-16 05:05:05", "2023-06-16", "2023-06-16T05:05:05", "2023-06-16T00:00:00"), - # Add more pairs of input strings and their expected ISO format results here - ], -) -def test_datetime_handling_in_update_source_metadata( - mocker, time_created, time_last_modified, expected_created, expected_modified -): - """Test the handling of various datetime formats in update_source_metadata.""" - # Create a mock SharePoint response directly in the test - mock_sharepoint_response = mocker.MagicMock() - mock_sharepoint_response.time_created = time_created - mock_sharepoint_response.time_last_modified = time_last_modified - - # Patch the SharePoint interaction methods to use the mock response - mocker.patch( - "unstructured.ingest.connector.sharepoint.SharepointIngestDoc._fetch_file", - return_value=mock_sharepoint_response, - ) - mocker.patch( - "unstructured.ingest.connector.sharepoint.SharepointIngestDoc._fetch_page", - return_value=None, - ) - - # Instantiate your document with dummy data - ingest_doc = SharepointIngestDoc( - connector_config=MagicMock(), - site_url="dummy_url", - server_path="dummy_path", - is_page=False, - file_path="dummy_path.html", - processor_config=ProcessorConfig(), - read_config=ReadConfig(), - ) - - # Execute the method under test - ingest_doc.update_source_metadata() - - # Assertions to verify the datetime handling against expected results - assert ingest_doc.source_metadata is not None - assert ingest_doc.source_metadata.date_created.startswith(expected_created) - assert ingest_doc.source_metadata.date_modified.startswith(expected_modified) diff --git a/test_unstructured_ingest/unit/connector/test_sql_conform_dict.py b/test_unstructured_ingest/unit/connector/test_sql_conform_dict.py deleted file mode 100644 index 45a8a44ef..000000000 --- a/test_unstructured_ingest/unit/connector/test_sql_conform_dict.py +++ /dev/null @@ -1,169 +0,0 @@ -import datetime -from unittest.mock import Mock, patch - -from unstructured.ingest.connector.sql import SqlDestinationConnector - -TEST_DATA_1 = { - "element_id": "80803034fe04181c163306740700cc54", - "metadata": { - "coordinates": { - "layout_height": 792, - "layout_width": 612, - "points": [ - [72.0, 72.69200000000001], - [72.0, 83.69200000000001], - [135.8, 83.69200000000001], - [135.8, 72.69200000000001], - ], - "system": "PixelSpace", - }, - "data_source": { - "date_created": "2023-10-25 10:05:44.976775", - "date_modified": "2023-10-25 10:05:44.976775", - "date_processed": "2023-12-14T17:06:33.074057", - "permissions_data": [{"mode": 33188}], - "url": "example-docs/pdf/fake-memo.pdf", - }, - "file_directory": "example-docs", - "filename": "fake-memo.pdf", - "filetype": "application/pdf", - "languages": ["eng"], - "last_modified": "2023-10-25T10:05:44", - "page_number": 1, - }, - "text": "May 5, 2023", - "type": "UncategorizedText", - "embeddings": [ - -0.05623878538608551, - 0.008579030632972717, - 0.03698136284947395, - -0.01745658740401268, - -0.030465232208371162, - 0.00996527448296547, - ], -} - -TEST_DATA_2 = { - "metadata": { - "coordinates": {"points": [1, 2, 3]}, - "links": {"link1": "https://example.com", "link2": "https://example.org"}, - "data_source": { - "date_created": "2021-01-01T00:00:00", - "date_modified": "2021-01-02T00:00:00", - "date_processed": "2022-12-13T15:44:08", - "version": 1.1, - }, - "last_modified": "2021-01-03T00:00:00", - "page_number": 10, - }, - "embeddings": [0.1, 0.2, 0.3], -} - -TEST_DATA_3 = { - "metadata": { - "coordinates": {"points": [1, 2, 3]}, - "data_source": { - "date_created": "2021-01-01T00:00:00", - "date_modified": "2021-01-02T00:00:00", - "date_processed": "2022-12-13T15:44:08", - "version": 1.1, - }, - "last_modified": "2021-01-03T00:00:00", - "page_number": 10, - "link_texts": ["Skip to main content"], - "link_urls": ["#main-content"], - }, - "embeddings": [0.1, 0.2, 0.3], -} - - -def test_conform_dict_1(): - """Validate that the conform_dict method returns the expected output for a real example""" - # Create a mock instance of the connector class - connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock()) - - # Mock the uuid.uuid4 function to return a fixed value - with patch("uuid.uuid4", return_value="mocked_uuid"): - # Call the conform_dict method - data_out = TEST_DATA_1.copy() - connector.conform_dict(data_out) - - # Assert that the result matches the expected output - assert data_out == { - "element_id": "80803034fe04181c163306740700cc54", - "text": "May 5, 2023", - "type": "UncategorizedText", - "id": "mocked_uuid", - "file_directory": "example-docs", - "filename": "fake-memo.pdf", - "filetype": "application/pdf", - "languages": ["eng"], - "last_modified": datetime.datetime(2023, 10, 25, 10, 5, 44), - "page_number": "1", - "date_created": datetime.datetime(2023, 10, 25, 10, 5, 44, 976775), - "date_modified": datetime.datetime(2023, 10, 25, 10, 5, 44, 976775), - "date_processed": datetime.datetime(2023, 12, 14, 17, 6, 33, 74057), - "permissions_data": '[{"mode": 33188}]', - "url": "example-docs/pdf/fake-memo.pdf", - "layout_height": 792, - "layout_width": 612, - "points": "[[72.0, 72.69200000000001], [72.0, 83.69200000000001]," - " [135.8, 83.69200000000001], [135.8, 72.69200000000001]]", - "system": "PixelSpace", - "embeddings": "[-0.05623878538608551, 0.008579030632972717, " - "0.03698136284947395, -0.01745658740401268, " - "-0.030465232208371162, 0.00996527448296547]", - } - - -def test_conform_dict_2(): - """Validate that the conform_dict method returns the expected output for a simplified example""" - # Create a mock instance of the connector class - connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock()) - - # Mock the uuid.uuid4 function to return a fixed value - with patch("uuid.uuid4", return_value="mocked_uuid"): - # Call the conform_dict method - data_out = TEST_DATA_2.copy() - connector.conform_dict(data_out) - - # Assert that the result matches the expected output - assert data_out == { - "embeddings": "[0.1, 0.2, 0.3]", - "id": "mocked_uuid", - "links": '{"link1": "https://example.com", "link2": "https://example.org"}', - "last_modified": datetime.datetime(2021, 1, 3, 0, 0), - "page_number": "10", - "date_created": datetime.datetime(2021, 1, 1, 0, 0), - "date_modified": datetime.datetime(2021, 1, 2, 0, 0), - "date_processed": datetime.datetime(2022, 12, 13, 15, 44, 8), - "version": "1.1", - "points": "[1, 2, 3]", - } - - -def test_conform_dict_link_texts(): - """Validate that the conform_dict method returns the expected output link_texts""" - # Create a mock instance of the connector class - connector = SqlDestinationConnector(write_config=Mock(), connector_config=Mock()) - - # Mock the uuid.uuid4 function to return a fixed value - with patch("uuid.uuid4", return_value="mocked_uuid"): - # Call the conform_dict method - data_out = TEST_DATA_3.copy() - connector.conform_dict(data_out) - - # Assert that the result matches the expected output - assert data_out == { - "embeddings": "[0.1, 0.2, 0.3]", - "id": "mocked_uuid", - "last_modified": datetime.datetime(2021, 1, 3, 0, 0), - "link_texts": ["Skip to main content"], - "link_urls": ["#main-content"], - "page_number": "10", - "date_created": datetime.datetime(2021, 1, 1, 0, 0), - "date_modified": datetime.datetime(2021, 1, 2, 0, 0), - "date_processed": datetime.datetime(2022, 12, 13, 15, 44, 8), - "version": "1.1", - "points": "[1, 2, 3]", - } diff --git a/test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py b/test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py deleted file mode 100644 index 7e1727d1e..000000000 --- a/test_unstructured_ingest/unit/enhanced_dataclass/test_enhanced_dataclass.py +++ /dev/null @@ -1,60 +0,0 @@ -import json -from dataclasses import Field, dataclass, fields - -import pytest - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.enhanced_dataclass.dataclasses import EnhancedField - - -@dataclass -class AuthData(EnhancedDataClassJsonMixin): - username: str - password: str = enhanced_field(sensitive=True) - date: int = enhanced_field(overload_name="time") - - -auth = AuthData(username="my name", password="top secret", date=3) - - -def test_enhanced_field(): - fs = fields(AuthData) - for f in fs: - if f.name == "username": - assert isinstance(f, Field) - assert hasattr(f, "sensitive") is False - else: - assert isinstance(f, EnhancedField) - if f.name == "password": - assert f.sensitive is True - else: - assert not f.sensitive - - -@pytest.mark.parametrize( - ("apply_name_overload", "expected_dict"), - [ - (True, {"username": "my name", "password": "THIS IS REDACTED", "time": 3}), - (False, {"username": "my name", "password": "THIS IS REDACTED", "date": 3}), - ], -) -def test_to_json(apply_name_overload: bool, expected_dict: dict): - j = auth.to_json( - redact_sensitive=True, - redacted_text="THIS IS REDACTED", - apply_name_overload=apply_name_overload, - ) - expected = json.dumps(expected_dict) - assert j == expected - - -@pytest.mark.parametrize( - ("apply_name_overload", "expected_dict"), - [ - (True, {"username": "my name", "password": "***REDACTED***", "time": 3}), - (False, {"username": "my name", "password": "***REDACTED***", "date": 3}), - ], -) -def test_to_dict(apply_name_overload: bool, expected_dict: dict): - d = auth.to_dict(redact_sensitive=True, apply_name_overload=apply_name_overload) - assert d == expected_dict diff --git a/test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py b/test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py deleted file mode 100644 index 433ee810d..000000000 --- a/test_unstructured_ingest/unit/pipeline/reformat/test_chunking.py +++ /dev/null @@ -1,156 +0,0 @@ -from __future__ import annotations - -import json -import logging -import os - -import pytest -from _pytest.logging import LogCaptureFixture - -from test_unstructured.unit_utils import ( - FixtureRequest, - Mock, - example_doc_path, - function_mock, - method_mock, -) -from unstructured.documents.elements import CompositeElement -from unstructured.ingest.interfaces import ChunkingConfig, PartitionConfig -from unstructured.ingest.pipeline.interfaces import PipelineContext -from unstructured.ingest.pipeline.reformat.chunking import Chunker - -ELEMENTS_JSON_FILE = example_doc_path( - "test_evaluate_files/unstructured_output/Bank Good Credit Loan.pptx.json" -) - - -class DescribeChunker: - """Unit tests for ingest.pipeline.reformat.chunking.Chunker""" - - # -- Chunker.run() ----------------------------------------------------------------------------- - - # -- integration test -- - def it_creates_JSON_elements(self, _ingest_docs_map_: Mock, tmpdir: str): - chunker = Chunker( - chunking_config=ChunkingConfig(chunking_strategy="by_title"), - pipeline_context=PipelineContext(work_dir=tmpdir), - partition_config=PartitionConfig(), - ) - # -- `Chunker.chunk()` defaults to writing to "{work_dir}/chunked", which is located in - # -- "/.cache" of a user's profile. - # -- Define `work_dir` add the "/chunked" subdirectory to it: - os.makedirs(os.path.join(tmpdir, "chunked"), exist_ok=True) - - filename = chunker.run(ELEMENTS_JSON_FILE) or "" - - head, tail = os.path.split(filename if filename else "") - # -- Check that a json file was created in `/chunked` -- - assert head.endswith("chunked") - assert tail.endswith(".json") - # -- Check contents of file -- - with open(filename) as json_f: - json_data = json.load(json_f) - assert all(d.get("type") == "CompositeElement" for d in json_data) - assert len(json_data) == 5 - - def it_returns_None_and_logs_message_without_chunking_strategy( - self, _ingest_docs_map_: Mock, caplog: LogCaptureFixture - ): - chunker = Chunker( - chunking_config=ChunkingConfig(), - pipeline_context=PipelineContext(), - partition_config=PartitionConfig(), - ) - caplog.set_level(logging.INFO) - - assert chunker.run(ELEMENTS_JSON_FILE) is None - assert "chunking_strategy is None, skipping chunking for" in caplog.text - - def it_logs_error_on_invalid_remote_chunking_strategy( - self, _ingest_docs_map_: Mock, caplog: LogCaptureFixture - ): - chunker = Chunker( - chunking_config=ChunkingConfig(chunking_strategy="by_invalid"), - pipeline_context=PipelineContext(), - partition_config=PartitionConfig(partition_by_api=True), - ) - - chunker.run(ELEMENTS_JSON_FILE) - - assert "Input should be 'basic', 'by_page', 'by_similarity'" in caplog.text - - def it_warns_with_nonlocal_chunking_strategy_and_partition_by_api_False( - self, _ingest_docs_map_: Mock, caplog: LogCaptureFixture - ): - chunker = Chunker( - chunking_config=ChunkingConfig(chunking_strategy="by_similarity"), - pipeline_context=PipelineContext(), - partition_config=PartitionConfig(partition_by_api=False), - ) - - chunker.run(ELEMENTS_JSON_FILE) - - assert "There is no locally available chunking_strategy:" in caplog.text - - # -- Chunker.chunk() --------------------------------------------------------------------------- - - def it_skips_chunking_if_strategy_is_None(self): - chunker = Chunker( - chunking_config=ChunkingConfig(chunking_strategy=None), - pipeline_context=PipelineContext(), - partition_config=PartitionConfig(), - ) - - assert chunker.chunk(ELEMENTS_JSON_FILE) is None - - # -- integration test -- - @pytest.mark.parametrize("strategy", ["by_title", "basic"]) - def it_chunks_locally(self, strategy: str, _ingest_docs_map_: Mock): - chunker = Chunker( - chunking_config=ChunkingConfig(chunking_strategy=strategy), - pipeline_context=PipelineContext(), - partition_config=PartitionConfig(), - ) - - chunked_elements = chunker.chunk(ELEMENTS_JSON_FILE) - - assert all(isinstance(elem, CompositeElement) for elem in chunked_elements) # type: ignore - - def it_chunks_remotely(self, _ingest_docs_map_: Mock, _partition_via_api_: Mock): - chunker = Chunker( - chunking_config=ChunkingConfig(chunking_strategy="by_similarity"), - pipeline_context=PipelineContext(), - partition_config=PartitionConfig( - partition_by_api=True, api_key="aaaaaaaaaaaaaaaaaaaaa" - ), - ) - - chunker.chunk(ELEMENTS_JSON_FILE) - - _partition_via_api_.assert_called_once_with( - filename=ELEMENTS_JSON_FILE, - api_key="aaaaaaaaaaaaaaaaaaaaa", - api_url="https://api.unstructured.io/general/v0/general", - chunking_strategy="by_similarity", - # (jennings) the sdk uses combine_under_n_chars but the ChunkingConfig param is - # combine_text_under_n_chars - combine_under_n_chars=None, - include_orig_elements=None, - max_characters=None, - multipage_sections=None, - new_after_n_chars=None, - overlap=None, - overlap_all=None, - ) - - # -- fixtures -------------------------------------------------------------------------------- - - @pytest.fixture() - def _ingest_docs_map_(self, request: FixtureRequest): - return method_mock(request, PipelineContext, "ingest_docs_map") - - @pytest.fixture() - def _partition_via_api_(self, request: FixtureRequest): - return function_mock( - request, "unstructured.ingest.pipeline.reformat.chunking.partition_via_api" - ) diff --git a/test_unstructured_ingest/unit/test_error.py b/test_unstructured_ingest/unit/test_error.py deleted file mode 100644 index 0c588409e..000000000 --- a/test_unstructured_ingest/unit/test_error.py +++ /dev/null @@ -1,27 +0,0 @@ -import pytest - -from unstructured.ingest.error import ( - DestinationConnectionError, - PartitionError, - SourceConnectionError, -) - - -@pytest.mark.parametrize( - ("error_class", "exception_type", "error_message"), - [ - (SourceConnectionError, ValueError, "Simulated connection error"), - (DestinationConnectionError, RuntimeError, "Simulated connection error"), - (PartitionError, FileNotFoundError, "Simulated partition error"), - ], -) -def test_custom_error_decorator(error_class, exception_type, error_message): - @error_class.wrap - def simulate_error(): - raise exception_type(error_message) - - with pytest.raises(error_class) as context: - simulate_error() - - expected_error_string = error_class.error_string.format(error_message) - assert str(context.value) == expected_error_string diff --git a/test_unstructured_ingest/unit/test_interfaces.py b/test_unstructured_ingest/unit/test_interfaces.py deleted file mode 100644 index 7a91ed9f1..000000000 --- a/test_unstructured_ingest/unit/test_interfaces.py +++ /dev/null @@ -1,281 +0,0 @@ -from __future__ import annotations - -import os -import pathlib -from dataclasses import dataclass -from typing import Any, Dict - -import pytest - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseSingleIngestDoc, - ChunkingConfig, - PartitionConfig, - ProcessorConfig, - ReadConfig, -) -from unstructured.partition.auto import partition -from unstructured.staging.base import elements_to_dicts - -DIRECTORY = pathlib.Path(__file__).parent.resolve() -EXAMPLE_DOCS_DIRECTORY = os.path.join(DIRECTORY, "../..", "example-docs") -TEST_DOWNLOAD_DIR = "/tmp" -TEST_OUTPUT_DIR = "/tmp" -TEST_ID = "test" -TEST_FILE_PATH = os.path.join(EXAMPLE_DOCS_DIRECTORY, "book-war-and-peace-1p.txt") - - -@dataclass -class ExampleConfig(BaseConnectorConfig): - id: str - path: str - - -TEST_CONFIG = ExampleConfig(id=TEST_ID, path=TEST_FILE_PATH) -TEST_SOURCE_URL = "test-source-url" -TEST_VERSION = "1.1.1" -TEST_RECORD_LOCATOR = {"id": "data-source-id"} -TEST_DATE_CREATED = "2021-01-01T00:00:00" -TEST_DATE_MODIFIED = "2021-01-02T00:00:00" -TEST_DATE_PROCESSSED = "2022-12-13T15:44:08" - - -@dataclass -class ExampleIngestDoc(BaseSingleIngestDoc): - connector_config: ExampleConfig - - @property - def filename(self): - return TEST_FILE_PATH - - @property - def _output_filename(self): - return TEST_FILE_PATH + ".json" - - @property - def source_url(self) -> str: - return TEST_SOURCE_URL - - @property - def version(self) -> str: - return TEST_VERSION - - @property - def record_locator(self) -> Dict[str, Any]: - return TEST_RECORD_LOCATOR - - @property - def date_created(self) -> str: - return TEST_DATE_CREATED - - @property - def date_modified(self) -> str: - return TEST_DATE_MODIFIED - - @property - def exists(self) -> bool: - return True - - def cleanup_file(self): - pass - - def get_file(self): - pass - - def has_output(self): - return True - - def write_result(self, result): - pass - - -@pytest.fixture() -def partition_test_results(): - # Reusable partition test results, calculated only once - result = partition( - filename=str(TEST_FILE_PATH), - data_source_metadata=DataSourceMetadata( - url=TEST_SOURCE_URL, - version=TEST_VERSION, - record_locator=TEST_RECORD_LOCATOR, - date_created=TEST_DATE_CREATED, - date_modified=TEST_DATE_MODIFIED, - date_processed=TEST_DATE_PROCESSSED, - ), - ) - return result - - -@pytest.fixture() -def partition_file_test_results(partition_test_results): - # Reusable partition_file test results, calculated only once - return elements_to_dicts(partition_test_results) - - -def test_partition_file(): - """Validate partition_file returns a list of dictionaries with the expected keys, - metadatakeys, and data source metadata values.""" - test_ingest_doc = ExampleIngestDoc( - connector_config=TEST_CONFIG, - read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR), - processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR), - ) - test_ingest_doc._date_processed = TEST_DATE_PROCESSSED - elements = test_ingest_doc.partition_file(partition_config=PartitionConfig()) - element_dicts = elements_to_dicts(elements) - assert len(element_dicts) - expected_keys = { - "element_id", - "text", - "type", - "metadata", - } - # The document in TEST_FILE_PATH does not have elements with coordinates so - # partition is not expected to return coordinates metadata. - expected_metadata_keys = { - "data_source", - "filename", - "file_directory", - "filetype", - "languages", - "last_modified", - } - for elem in element_dicts: - # Parent IDs are non-deterministic - remove them from the test - elem["metadata"].pop("parent_id", None) - - assert expected_keys == set(elem.keys()) - assert expected_metadata_keys == set(elem["metadata"].keys()) - data_source_metadata = elem["metadata"]["data_source"] - assert data_source_metadata["url"] == TEST_SOURCE_URL - assert data_source_metadata["version"] == TEST_VERSION - assert data_source_metadata["record_locator"] == TEST_RECORD_LOCATOR - assert data_source_metadata["date_created"] == TEST_DATE_CREATED - assert data_source_metadata["date_modified"] == TEST_DATE_MODIFIED - assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSSED - - -def test_process_file_fields_include_default(mocker, partition_test_results): - """Validate when metadata_include and metadata_exclude are not set, all fields: - ("element_id", "text", "type", "metadata") are included""" - mock_partition = mocker.patch( - "unstructured.partition.auto.partition", - return_value=partition_test_results, - ) - test_ingest_doc = ExampleIngestDoc( - connector_config=TEST_CONFIG, - read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR), - processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR), - ) - elements = test_ingest_doc.partition_file(partition_config=PartitionConfig()) - element_dicts = elements_to_dicts(elements) - assert len(element_dicts) - assert mock_partition.call_count == 1 - for elem in element_dicts: - # Parent IDs are non-deterministic - remove them from the test - elem["metadata"].pop("parent_id", None) - - assert {"element_id", "text", "type", "metadata"} == set(elem.keys()) - data_source_metadata = elem["metadata"]["data_source"] - assert data_source_metadata["url"] == TEST_SOURCE_URL - assert data_source_metadata["version"] == TEST_VERSION - assert data_source_metadata["record_locator"] == TEST_RECORD_LOCATOR - assert data_source_metadata["date_created"] == TEST_DATE_CREATED - assert data_source_metadata["date_modified"] == TEST_DATE_MODIFIED - assert data_source_metadata["date_processed"] == TEST_DATE_PROCESSSED - - -def test_process_file_metadata_includes_filename_and_filetype( - mocker, - partition_test_results, -): - """Validate when metadata_include is set to "filename,filetype", - only filename is included in metadata""" - mocker.patch( - "unstructured.partition.auto.partition", - return_value=partition_test_results, - ) - partition_config = PartitionConfig( - metadata_include=["filename", "filetype"], - ) - test_ingest_doc = ExampleIngestDoc( - connector_config=TEST_CONFIG, - read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR), - processor_config=ProcessorConfig(output_dir=TEST_OUTPUT_DIR), - ) - isd_elems = test_ingest_doc.process_file(partition_config=partition_config) - assert len(isd_elems) - for elem in isd_elems: - # Parent IDs are non-deterministic - remove them from the test - elem["metadata"].pop("parent_id", None) - - assert set(elem["metadata"].keys()) == {"filename", "filetype"} - - -def test_process_file_metadata_exclude_filename_pagenum(mocker, partition_test_results): - """Validate when metadata_exclude is set to "filename,page_number", - neither filename nor page_number are included in metadata""" - mocker.patch( - "unstructured.partition.auto.partition", - return_value=partition_test_results, - ) - partition_config = PartitionConfig( - metadata_exclude=["filename", "page_number"], - ) - test_ingest_doc = ExampleIngestDoc( - connector_config=TEST_CONFIG, - read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR), - processor_config=ProcessorConfig( - output_dir=TEST_OUTPUT_DIR, - ), - ) - isd_elems = test_ingest_doc.process_file(partition_config=partition_config) - assert len(isd_elems) - for elem in isd_elems: - assert "filename" not in elem["metadata"] - assert "page_number" not in elem["metadata"] - - -def test_process_file_flatten_metadata(mocker, partition_test_results): - mocker.patch( - "unstructured.partition.auto.partition", - return_value=partition_test_results, - ) - partition_config = PartitionConfig( - metadata_include=["filename", "file_directory", "filetype"], - flatten_metadata=True, - ) - test_ingest_doc = ExampleIngestDoc( - connector_config=TEST_CONFIG, - read_config=ReadConfig(download_dir=TEST_DOWNLOAD_DIR), - processor_config=ProcessorConfig( - output_dir=TEST_OUTPUT_DIR, - ), - ) - isd_elems = test_ingest_doc.process_file(partition_config=partition_config) - expected_keys = {"element_id", "text", "type", "filename", "file_directory", "filetype"} - for elem in isd_elems: - assert expected_keys == set(elem.keys()) - - -class DescribeChunkingConfig: - """Unit tests for unstructured.ingest.interfaces.ChunkingConfig""" - - def it_accepts_chunking_strategy_by_itself(self): - config = ChunkingConfig(chunking_strategy="basic") - assert config.chunking_strategy == "basic" - - def it_defaults_to_chunk_by_title_if_only_chunk_elements_is_True(self): - config = ChunkingConfig(chunk_elements=True) - assert config.chunking_strategy == "by_title" - - def but_it_defaults_to_chunking_strategy_over_chunk_elements(self): - config = ChunkingConfig(chunk_elements=True, chunking_strategy="basic") - assert config.chunking_strategy == "basic" - - def it_silently_accepts_unrecognized_chunker(self, caplog: pytest.LogCaptureFixture): - config = ChunkingConfig(chunking_strategy="foobar") - assert config.chunking_strategy == "foobar" - assert caplog.text == "" diff --git a/test_unstructured_ingest/unit/test_logger.py b/test_unstructured_ingest/unit/test_logger.py deleted file mode 100644 index 4f15aba4c..000000000 --- a/test_unstructured_ingest/unit/test_logger.py +++ /dev/null @@ -1,78 +0,0 @@ -import json - -import pytest - -from unstructured.ingest.logger import ( - default_is_data_sensitive, - hide_sensitive_fields, - redact_jsons, -) - - -@pytest.mark.parametrize( - ("key", "value", "is_sensitive"), - [ - ("username", "john_smith", False), - ("password", "13?H%", True), - ("token", "123", True), - ("AWS_CREDENTIAL", "aws_credential", True), - ("AWS_KEY", None, False), - ], -) -def test_default_is_sensitive(key, value, is_sensitive): - assert default_is_data_sensitive(key, value) == is_sensitive - - -def test_hide_sensitive_fields(): - d = { - "username": "john_smith", - "password": "13?H%", - "inner": { - "token": "123", - "AWS_KEY": None, - "inner_j_string": json.dumps( - {"account_name": "secret name", "client_id": 123, "timestamp": 123} - ), - }, - } - redacted_d = hide_sensitive_fields(d) - expected_d = { - "password": "*******", - "username": "john_smith", - "inner": { - "token": "*******", - "AWS_KEY": None, - "inner_j_string": json.dumps( - {"account_name": "*******", "client_id": "*******", "timestamp": 123} - ), - }, - } - assert redacted_d == expected_d - - -def test_redact_jsons(): - d1 = { - "username": "john_smith", - "password": "13?H%", - "inner": { - "token": "123", - "AWS_KEY": None, - "inner_j_string": json.dumps( - {"account_name": "secret name", "client_id": 123, "timestamp": 123} - ), - }, - } - - d2 = {"username": "tim67", "update_time": 456} - d3 = {"account_name": "top secret", "host": "http://localhost:8888"} - - sensitive_string = f"Some topic secret info ({json.dumps(d1)} regarding {d2} and {d3})" - expected_string = ( - 'Some topic secret info ({"username": "john_smith", "password": "*******", ' - '"inner": {"token": "*******", "AWS_KEY": null, "inner_j_string": ' - '"{\\"account_name\\": \\"*******\\", \\"client_id\\": \\"*******\\", ' - '\\"timestamp\\": 123}"}} regarding {"username": "tim67", "update_time": 456} ' - 'and {"account_name": "*******", "host": "http://localhost:8888"})' - ) - redacted_string = redact_jsons(sensitive_string) - assert redacted_string == expected_string diff --git a/test_unstructured_ingest/unit/test_utils.py b/test_unstructured_ingest/unit/test_utils.py deleted file mode 100644 index bf2556cbe..000000000 --- a/test_unstructured_ingest/unit/test_utils.py +++ /dev/null @@ -1,164 +0,0 @@ -import json -import typing as t -from dataclasses import dataclass, field -from datetime import datetime - -import pytest -import pytz - -from unstructured.ingest.cli.utils import extract_config -from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.utils.string_and_date_utils import ensure_isoformat_datetime, json_to_dict - - -@dataclass -class A(BaseConfig): - a: str - - -@dataclass -class B(BaseConfig): - a: A - b: int - - -flat_data = {"a": "test", "b": 4, "c": True} - - -def test_extract_config_concrete(): - @dataclass - class C(BaseConfig): - b: B - c: bool - - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_extract_config_optional(): - @dataclass - class C(BaseConfig): - c: bool - b: t.Optional[B] = None - - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": True} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_extract_config_union(): - @dataclass - class C(BaseConfig): - c: bool - b: t.Optional[t.Union[B, int]] = None - - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"b": 4, "c": True} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_extract_config_list(): - @dataclass - class C(BaseConfig): - c: t.List[int] - b: B - - flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]} - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_extract_config_optional_list(): - @dataclass - class C(BaseConfig): - b: B - c: t.Optional[t.List[int]] = None - - flat_data = {"a": "test", "b": 4, "c": [1, 2, 3]} - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"b": {"a": {"a": "test"}, "b": 4}, "c": [1, 2, 3]} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_extract_config_dataclass_list(): - @dataclass - class C(BaseConfig): - c: bool - b: t.List[B] = field(default_factory=list) - - flat_data = {"a": "test", "c": True} - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"b": [], "c": True} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_extract_config_dict(): - @dataclass - class C(BaseConfig): - c: bool - b: t.Dict[str, B] = field(default_factory=dict) - - flat_data = {"c": True} - c = extract_config(flat_data=flat_data, config=C) - expected_result = {"c": True, "b": {}} - assert c.to_json(sort_keys=True) == json.dumps(expected_result, sort_keys=True) - - -def test_json_to_dict_valid_json(): - json_string = '{"key": "value"}' - expected_result = {"key": "value"} - assert json_to_dict(json_string) == expected_result - assert isinstance(json_to_dict(json_string), dict) - - -def test_json_to_dict_malformed_json(): - json_string = '{"key": "value"' - expected_result = '{"key": "value"' - assert json_to_dict(json_string) == expected_result - assert isinstance(json_to_dict(json_string), str) - - -def test_json_to_dict_single_quotes(): - json_string = "{'key': 'value'}" - expected_result = {"key": "value"} - assert json_to_dict(json_string) == expected_result - assert isinstance(json_to_dict(json_string), dict) - - -def test_json_to_dict_path(): - json_string = "/path/to/file.json" - expected_result = "/path/to/file.json" - assert json_to_dict(json_string) == expected_result - assert isinstance(json_to_dict(json_string), str) - - -def test_ensure_isoformat_datetime_for_datetime(): - dt = ensure_isoformat_datetime(datetime(2021, 1, 1, 12, 0, 0)) - assert dt == "2021-01-01T12:00:00" - - -def test_ensure_isoformat_datetime_for_datetime_with_tz(): - dt = ensure_isoformat_datetime(datetime(2021, 1, 1, 12, 0, 0, tzinfo=pytz.UTC)) - assert dt == "2021-01-01T12:00:00+00:00" - - -def test_ensure_isoformat_datetime_for_string(): - dt = ensure_isoformat_datetime("2021-01-01T12:00:00") - assert dt == "2021-01-01T12:00:00" - - -def test_ensure_isoformat_datetime_for_string2(): - dt = ensure_isoformat_datetime("2021-01-01T12:00:00+00:00") - assert dt == "2021-01-01T12:00:00+00:00" - - -def test_ensure_isoformat_datetime_fails_on_string(): - with pytest.raises(ValueError): - ensure_isoformat_datetime("bad timestamp") - - -def test_ensure_isoformat_datetime_fails_on_int(): - with pytest.raises(TypeError): - ensure_isoformat_datetime(1111) diff --git a/unstructured/__version__.py b/unstructured/__version__.py index e794a070a..65162b438 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.15.15-dev0" # pragma: no cover +__version__ = "0.16.0" # pragma: no cover diff --git a/unstructured/embed/bedrock.py b/unstructured/embed/bedrock.py index dba52e776..b667e9558 100644 --- a/unstructured/embed/bedrock.py +++ b/unstructured/embed/bedrock.py @@ -1,62 +1,69 @@ from dataclasses import dataclass -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, List import numpy as np +from pydantic import SecretStr from unstructured.documents.elements import ( Element, ) from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import requires_dependencies if TYPE_CHECKING: from langchain_community.embeddings import BedrockEmbeddings -@dataclass class BedrockEmbeddingConfig(EmbeddingConfig): - aws_access_key_id: str = enhanced_field(sensitive=True) - aws_secret_access_key: str = enhanced_field(sensitive=True) + aws_access_key_id: SecretStr + aws_secret_access_key: SecretStr region_name: str = "us-west-2" + @requires_dependencies( + ["boto3", "numpy", "langchain_community"], + extras="bedrock", + ) + def get_client(self) -> "BedrockEmbeddings": + # delay import only when needed + import boto3 + from langchain_community.embeddings import BedrockEmbeddings + + bedrock_runtime = boto3.client( + service_name="bedrock-runtime", + aws_access_key_id=self.aws_access_key_id.get_secret_value(), + aws_secret_access_key=self.aws_secret_access_key.get_secret_value(), + region_name=self.region_name, + ) + + bedrock_client = BedrockEmbeddings(client=bedrock_runtime) + return bedrock_client + @dataclass class BedrockEmbeddingEncoder(BaseEmbeddingEncoder): config: BedrockEmbeddingConfig - _client: Optional["BedrockEmbeddings"] = enhanced_field(init=False, default=None) - _exemplary_embedding: Optional[List[float]] = enhanced_field(init=False, default=None) - @property - def client(self) -> "BedrockEmbeddings": - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: - if self._exemplary_embedding is None: - self._exemplary_embedding = self.client.embed_query("Q") - return self._exemplary_embedding + def get_exemplary_embedding(self) -> List[float]: + return self.embed_query(query="Q") def __post_init__(self): self.initialize() - def initialize(self): - self.bedrock_client = self.create_client() - def num_of_dimensions(self): - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) def is_unit_vector(self): - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def embed_query(self, query): - return np.array(self.bedrock_client.embed_query(query)) + bedrock_client = self.config.get_client() + return np.array(bedrock_client.embed_query(query)) def embed_documents(self, elements: List[Element]) -> List[Element]: - embeddings = self.bedrock_client.embed_documents([str(e) for e in elements]) + bedrock_client = self.config.get_client() + embeddings = bedrock_client.embed_documents([str(e) for e in elements]) elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings) return elements_with_embeddings @@ -67,18 +74,3 @@ class BedrockEmbeddingEncoder(BaseEmbeddingEncoder): element.embeddings = embeddings[i] elements_w_embedding.append(element) return elements - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies( - ["boto3", "numpy", "langchain_community"], - extras="bedrock", - ) - def create_client(self) -> "BedrockEmbeddings": - # delay import only when needed - import boto3 - from langchain_community.embeddings import BedrockEmbeddings - - bedrock_runtime = boto3.client(service_name="bedrock-runtime", **self.config.to_dict()) - - bedrock_client = BedrockEmbeddings(client=bedrock_runtime) - return bedrock_client diff --git a/unstructured/embed/huggingface.py b/unstructured/embed/huggingface.py index cb98be0e8..d955f7053 100644 --- a/unstructured/embed/huggingface.py +++ b/unstructured/embed/huggingface.py @@ -1,60 +1,59 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import TYPE_CHECKING, List, Optional import numpy as np +from pydantic import Field from unstructured.documents.elements import ( Element, ) from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import requires_dependencies if TYPE_CHECKING: from langchain_huggingface.embeddings import HuggingFaceEmbeddings -@dataclass class HuggingFaceEmbeddingConfig(EmbeddingConfig): - model_name: Optional[str] = "sentence-transformers/all-MiniLM-L6-v2" - model_kwargs: Optional[dict] = field(default_factory=lambda: {"device": "cpu"}) - encode_kwargs: Optional[dict] = field(default_factory=lambda: {"normalize_embeddings": False}) - cache_folder: Optional[dict] = None + model_name: Optional[str] = Field(default="sentence-transformers/all-MiniLM-L6-v2") + model_kwargs: Optional[dict] = Field(default_factory=lambda: {"device": "cpu"}) + encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False}) + cache_folder: Optional[dict] = Field(default=None) + + @requires_dependencies( + ["langchain_huggingface"], + extras="embed-huggingface", + ) + def get_client(self) -> "HuggingFaceEmbeddings": + """Creates a langchain Huggingface python client to embed elements.""" + from langchain_huggingface.embeddings import HuggingFaceEmbeddings + + client = HuggingFaceEmbeddings(**self.dict()) + return client @dataclass class HuggingFaceEmbeddingEncoder(BaseEmbeddingEncoder): config: HuggingFaceEmbeddingConfig - _client: Optional["HuggingFaceEmbeddings"] = field(init=False, default=None) - _exemplary_embedding: Optional[List[float]] = field(init=False, default=None) - @property - def client(self) -> "HuggingFaceEmbeddings": - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: - if self._exemplary_embedding is None: - self._exemplary_embedding = self.client.embed_query("Q") - return self._exemplary_embedding - - def initialize(self): - """Creates a langchain HuggingFace object to embed elements.""" - _ = self.client + def get_exemplary_embedding(self) -> List[float]: + return self.embed_query(query="Q") def num_of_dimensions(self): - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) def is_unit_vector(self): - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def embed_query(self, query): - return self.client.embed_query(str(query)) + client = self.config.get_client() + return client.embed_query(str(query)) def embed_documents(self, elements: List[Element]) -> List[Element]: - embeddings = self.client.embed_documents([str(e) for e in elements]) + client = self.config.get_client() + embeddings = client.embed_documents([str(e) for e in elements]) elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings) return elements_with_embeddings @@ -66,15 +65,3 @@ class HuggingFaceEmbeddingEncoder(BaseEmbeddingEncoder): element.embeddings = embeddings[i] elements_w_embedding.append(element) return elements - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies( - ["langchain_huggingface"], - extras="embed-huggingface", - ) - def create_client(self) -> "HuggingFaceEmbeddings": - """Creates a langchain Huggingface python client to embed elements.""" - from langchain_huggingface.embeddings import HuggingFaceEmbeddings - - client = HuggingFaceEmbeddings(**self.config.to_dict()) - return client diff --git a/unstructured/embed/interfaces.py b/unstructured/embed/interfaces.py index e98c0c902..a6b0a3665 100644 --- a/unstructured/embed/interfaces.py +++ b/unstructured/embed/interfaces.py @@ -2,17 +2,17 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from typing import List, Tuple +from pydantic import BaseModel + from unstructured.documents.elements import Element -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -@dataclass -class EmbeddingConfig(EnhancedDataClassJsonMixin): +class EmbeddingConfig(BaseModel): pass @dataclass -class BaseEmbeddingEncoder(EnhancedDataClassJsonMixin, ABC): +class BaseEmbeddingEncoder(ABC): config: EmbeddingConfig @abstractmethod diff --git a/unstructured/embed/mixedbreadai.py b/unstructured/embed/mixedbreadai.py index 656d41e99..d89db571f 100644 --- a/unstructured/embed/mixedbreadai.py +++ b/unstructured/embed/mixedbreadai.py @@ -3,10 +3,10 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, List, Optional import numpy as np +from pydantic import Field, SecretStr from unstructured.documents.elements import Element from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import requires_dependencies USER_AGENT = "@mixedbread-ai/unstructured" @@ -22,7 +22,6 @@ if TYPE_CHECKING: from mixedbread_ai.core import RequestOptions -@dataclass class MixedbreadAIEmbeddingConfig(EmbeddingConfig): """ Configuration class for Mixedbread AI Embedding Encoder. @@ -32,14 +31,31 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig): model_name (str): Name of the model to use for embeddings. """ - api_key: str = field( - default_factory=lambda: os.environ.get("MXBAI_API_KEY"), + api_key: SecretStr = Field( + default_factory=lambda: SecretStr(os.environ.get("MXBAI_API_KEY")), ) - model_name: str = field( + model_name: str = Field( default="mixedbread-ai/mxbai-embed-large-v1", ) + @requires_dependencies( + ["mixedbread_ai"], + extras="embed-mixedbreadai", + ) + def get_client(self) -> "MixedbreadAI": + """ + Create the Mixedbread AI client. + + Returns: + MixedbreadAI: Initialized client. + """ + from mixedbread_ai.client import MixedbreadAI + + return MixedbreadAI( + api_key=self.api_key.get_secret_value(), + ) + @dataclass class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder): @@ -52,23 +68,12 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder): config: MixedbreadAIEmbeddingConfig - _client: Optional["MixedbreadAI"] = field(init=False, default=None) _exemplary_embedding: Optional[List[float]] = field(init=False, default=None) _request_options: Optional["RequestOptions"] = field(init=False, default=None) - @property - def client(self) -> "MixedbreadAI": - """Lazy initialization of the Mixedbread AI client.""" - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: + def get_exemplary_embedding(self) -> List[float]: """Get an exemplary embedding to determine dimensions and unit vector status.""" - if self._exemplary_embedding is None: - self._exemplary_embedding = self._embed(["Q"])[0] - return self._exemplary_embedding + return self._embed(["Q"])[0] def initialize(self): if self.config.api_key is None: @@ -89,12 +94,14 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder): @property def num_of_dimensions(self): """Get the number of dimensions for the embeddings.""" - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) @property def is_unit_vector(self) -> bool: """Check if the embedding is a unit vector.""" - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def _embed(self, texts: List[str]) -> List[List[float]]: """ @@ -110,10 +117,10 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder): batch_itr = range(0, len(texts), batch_size) responses = [] - + client = self.config.get_client() for i in batch_itr: batch = texts[i : i + batch_size] - response = self.client.embeddings( + response = client.embeddings( model=self.config.model_name, normalized=True, encoding_format=ENCODING_FORMAT, @@ -169,21 +176,3 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder): List[float]: Embedding of the query. """ return self._embed([query])[0] - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies( - ["mixedbread_ai"], - extras="embed-mixedbreadai", - ) - def create_client(self) -> "MixedbreadAI": - """ - Create the Mixedbread AI client. - - Returns: - MixedbreadAI: Initialized client. - """ - from mixedbread_ai.client import MixedbreadAI - - return MixedbreadAI( - api_key=self.config.api_key, - ) diff --git a/unstructured/embed/octoai.py b/unstructured/embed/octoai.py index e4f7fcb38..119a41bc8 100644 --- a/unstructured/embed/octoai.py +++ b/unstructured/embed/octoai.py @@ -2,57 +2,57 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING, List, Optional import numpy as np +from pydantic import Field, SecretStr from unstructured.documents.elements import ( Element, ) from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import requires_dependencies if TYPE_CHECKING: from openai import OpenAI -OCTOAI_BASE_URL = "https://text.octoai.run/v1" - -@dataclass class OctoAiEmbeddingConfig(EmbeddingConfig): - api_key: str = enhanced_field(sensitive=True) - model_name: str = "thenlper/gte-large" + api_key: SecretStr + model_name: str = Field(default="thenlper/gte-large") + base_url: str = Field(default="https://text.octoai.run/v1") + + @requires_dependencies( + ["openai", "tiktoken"], + extras="embed-octoai", + ) + def get_client(self) -> "OpenAI": + """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK.""" + from openai import OpenAI + + return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url) @dataclass class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder): config: OctoAiEmbeddingConfig # Uses the OpenAI SDK - _client: Optional["OpenAI"] = field(init=False, default=None) _exemplary_embedding: Optional[List[float]] = field(init=False, default=None) - @property - def client(self) -> "OpenAI": - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: - if self._exemplary_embedding is None: - self._exemplary_embedding = self.embed_query("Q") - return self._exemplary_embedding + def get_exemplary_embedding(self) -> List[float]: + return self.embed_query("Q") def initialize(self): pass def num_of_dimensions(self): - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) def is_unit_vector(self): - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def embed_query(self, query): - response = self.client.embeddings.create(input=str(query), model=self.config.model_name) + client = self.config.get_client() + response = client.embeddings.create(input=str(query), model=self.config.model_name) return response.data[0].embedding def embed_documents(self, elements: List[Element]) -> List[Element]: @@ -67,14 +67,3 @@ class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder): element.embeddings = embeddings[i] elements_w_embedding.append(element) return elements - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies( - ["openai", "tiktoken"], - extras="embed-octoai", - ) - def create_client(self) -> "OpenAI": - """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK.""" - from openai import OpenAI - - return OpenAI(api_key=self.config.api_key, base_url=OCTOAI_BASE_URL) diff --git a/unstructured/embed/openai.py b/unstructured/embed/openai.py index a2f7d6472..ad97c49d9 100644 --- a/unstructured/embed/openai.py +++ b/unstructured/embed/openai.py @@ -1,58 +1,60 @@ -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, List, Optional +from dataclasses import dataclass +from typing import TYPE_CHECKING, List import numpy as np +from pydantic import Field, SecretStr from unstructured.documents.elements import ( Element, ) from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import requires_dependencies if TYPE_CHECKING: from langchain_openai.embeddings import OpenAIEmbeddings -@dataclass class OpenAIEmbeddingConfig(EmbeddingConfig): - api_key: str = enhanced_field(sensitive=True) - model_name: str = "text-embedding-ada-002" + api_key: SecretStr + model_name: str = Field(default="text-embedding-ada-002") + + @requires_dependencies(["langchain_openai"], extras="openai") + def get_client(self) -> "OpenAIEmbeddings": + """Creates a langchain OpenAI python client to embed elements.""" + from langchain_openai import OpenAIEmbeddings + + openai_client = OpenAIEmbeddings( + openai_api_key=self.api_key.get_secret_value(), + model=self.model_name, # type:ignore + ) + return openai_client @dataclass class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder): config: OpenAIEmbeddingConfig - _client: Optional["OpenAIEmbeddings"] = field(init=False, default=None) - _exemplary_embedding: Optional[List[float]] = field(init=False, default=None) - @property - def client(self) -> "OpenAIEmbeddings": - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: - if self._exemplary_embedding is None: - self._exemplary_embedding = self.client.embed_query("Q") - return self._exemplary_embedding + def get_exemplary_embedding(self) -> List[float]: + return self.embed_query(query="Q") def initialize(self): pass def num_of_dimensions(self): - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) def is_unit_vector(self): - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def embed_query(self, query): - return self.client.embed_query(str(query)) + client = self.config.get_client() + return client.embed_query(str(query)) def embed_documents(self, elements: List[Element]) -> List[Element]: - embeddings = self.client.embed_documents([str(e) for e in elements]) + client = self.config.get_client() + embeddings = client.embed_documents([str(e) for e in elements]) elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings) return elements_with_embeddings @@ -63,15 +65,3 @@ class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder): element.embeddings = embeddings[i] elements_w_embedding.append(element) return elements - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies(["langchain_openai"], extras="openai") - def create_client(self) -> "OpenAIEmbeddings": - """Creates a langchain OpenAI python client to embed elements.""" - from langchain_openai import OpenAIEmbeddings - - openai_client = OpenAIEmbeddings( - openai_api_key=self.config.api_key, - model=self.config.model_name, # type:ignore - ) - return openai_client diff --git a/unstructured/embed/vertexai.py b/unstructured/embed/vertexai.py index edbc8c2ef..5228ed497 100644 --- a/unstructured/embed/vertexai.py +++ b/unstructured/embed/vertexai.py @@ -1,62 +1,71 @@ # type: ignore import json import os -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import TYPE_CHECKING, List, Optional import numpy as np +from pydantic import Field, SecretStr from unstructured.documents.elements import ( Element, ) from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import FileHandler, requires_dependencies if TYPE_CHECKING: from langchain_google_vertexai import VertexAIEmbeddings -@dataclass class VertexAIEmbeddingConfig(EmbeddingConfig): - api_key: str = enhanced_field(sensitive=True) - model_name: Optional[str] = "textembedding-gecko@001" + api_key: SecretStr + model_name: Optional[str] = Field(default="textembedding-gecko@001") + + def register_application_credentials(self): + application_credentials_path = os.path.join("/tmp", "google-vertex-app-credentials.json") + credentials_file = FileHandler(application_credentials_path) + credentials_file.write_file(json.dumps(json.loads(self.api_key.get_secret_value()))) + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = application_credentials_path + + @requires_dependencies( + ["langchain", "langchain_google_vertexai"], + extras="embed-vertexai", + ) + def get_client(self) -> "VertexAIEmbeddings": + """Creates a Langchain VertexAI python client to embed elements.""" + from langchain_google_vertexai import VertexAIEmbeddings + + self.register_application_credentials() + vertexai_client = VertexAIEmbeddings(model_name=self.model_name) + return vertexai_client @dataclass class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder): config: VertexAIEmbeddingConfig - _client: Optional["VertexAIEmbeddings"] = field(init=False, default=None) - _exemplary_embedding: Optional[List[float]] = field(init=False, default=None) - @property - def client(self) -> "VertexAIEmbeddings": - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: - if self._exemplary_embedding is None: - self._exemplary_embedding = self.client.embed_query("A sample query.") - return self._exemplary_embedding + def get_exemplary_embedding(self) -> List[float]: + return self.embed_query(query="A sample query.") def initialize(self): pass def num_of_dimensions(self): - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) def is_unit_vector(self): - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def embed_query(self, query): - result = self.client.embed_query(str(query)) + client = self.config.get_client() + result = client.embed_query(str(query)) return result def embed_documents(self, elements: List[Element]) -> List[Element]: - embeddings = self.client.embed_documents([str(e) for e in elements]) + client = self.config.get_client() + embeddings = client.embed_documents([str(e) for e in elements]) elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings) return elements_with_embeddings @@ -67,25 +76,3 @@ class VertexAIEmbeddingEncoder(BaseEmbeddingEncoder): element.embeddings = embeddings[i] elements_w_embedding.append(element) return elements - - @property - def application_credentials_path(self): - return os.path.join("/tmp", "google-vertex-app-credentials.json") - - def register_application_credentials(self): - credentials_file = FileHandler(self.application_credentials_path) - credentials_file.write_file(json.dumps(json.loads(self.config.api_key))) - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.application_credentials_path - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies( - ["langchain", "langchain_google_vertexai"], - extras="embed-vertexai", - ) - def create_client(self) -> "VertexAIEmbeddings": - """Creates a Langchain VertexAI python client to embed elements.""" - from langchain_google_vertexai import VertexAIEmbeddings - - self.register_application_credentials() - vertexai_client = VertexAIEmbeddings(model_name=self.config.model_name) - return vertexai_client diff --git a/unstructured/embed/voyageai.py b/unstructured/embed/voyageai.py index 56f98d365..c5dd5b61c 100644 --- a/unstructured/embed/voyageai.py +++ b/unstructured/embed/voyageai.py @@ -1,61 +1,67 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import TYPE_CHECKING, List, Optional import numpy as np +from pydantic import Field, SecretStr from unstructured.documents.elements import Element from unstructured.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import EmbeddingEncoderConnectionError from unstructured.utils import requires_dependencies if TYPE_CHECKING: from langchain_voyageai import VoyageAIEmbeddings -@dataclass class VoyageAIEmbeddingConfig(EmbeddingConfig): - api_key: str = enhanced_field(sensitive=True) + api_key: SecretStr model_name: str - batch_size: Optional[int] = None - truncation: Optional[bool] = None + batch_size: Optional[int] = Field(default=None) + truncation: Optional[bool] = Field(default=None) + + @requires_dependencies( + ["langchain", "langchain_voyageai"], + extras="embed-voyageai", + ) + def get_client(self) -> "VoyageAIEmbeddings": + """Creates a Langchain VoyageAI python client to embed elements.""" + from langchain_voyageai import VoyageAIEmbeddings + + return VoyageAIEmbeddings( + voyage_api_key=self.api_key, + model=self.model_name, + batch_size=self.batch_size, + truncation=self.truncation, + ) @dataclass class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder): config: VoyageAIEmbeddingConfig - _client: Optional["VoyageAIEmbeddings"] = field(init=False, default=None) - _exemplary_embedding: Optional[List[float]] = field(init=False, default=None) - @property - def client(self) -> "VoyageAIEmbeddings": - if self._client is None: - self._client = self.create_client() - return self._client - - @property - def exemplary_embedding(self) -> List[float]: - if self._exemplary_embedding is None: - self._exemplary_embedding = self.client.embed_query("A sample query.") - return self._exemplary_embedding + def get_exemplary_embedding(self) -> List[float]: + return self.embed_query(query="A sample query.") def initialize(self): pass @property def num_of_dimensions(self) -> tuple[int, ...]: - return np.shape(self.exemplary_embedding) + exemplary_embedding = self.get_exemplary_embedding() + return np.shape(exemplary_embedding) @property def is_unit_vector(self) -> bool: - return np.isclose(np.linalg.norm(self.exemplary_embedding), 1.0) + exemplary_embedding = self.get_exemplary_embedding() + return np.isclose(np.linalg.norm(exemplary_embedding), 1.0) def embed_documents(self, elements: List[Element]) -> List[Element]: - embeddings = self.client.embed_documents([str(e) for e in elements]) + client = self.config.get_client() + embeddings = client.embed_documents([str(e) for e in elements]) return self._add_embeddings_to_elements(elements, embeddings) def embed_query(self, query: str) -> List[float]: - return self.client.embed_query(query) + client = self.config.get_client() + return client.embed_query(query) @staticmethod def _add_embeddings_to_elements(elements, embeddings) -> List[Element]: @@ -65,19 +71,3 @@ class VoyageAIEmbeddingEncoder(BaseEmbeddingEncoder): element.embeddings = embeddings[i] elements_w_embedding.append(element) return elements - - @EmbeddingEncoderConnectionError.wrap - @requires_dependencies( - ["langchain", "langchain_voyageai"], - extras="embed-voyageai", - ) - def create_client(self) -> "VoyageAIEmbeddings": - """Creates a Langchain VoyageAI python client to embed elements.""" - from langchain_voyageai import VoyageAIEmbeddings - - return VoyageAIEmbeddings( - voyage_api_key=self.config.api_key, - model=self.config.model_name, - batch_size=self.config.batch_size, - truncation=self.config.truncation, - ) diff --git a/unstructured/ingest/README.md b/unstructured/ingest/README.md deleted file mode 100644 index f7291aa5a..000000000 --- a/unstructured/ingest/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Ingest -![Project unmaintained](https://img.shields.io/badge/project-unmaintained-red.svg) - -Project has been moved to: [Unstructured Ingest](https://github.com/Unstructured-IO/unstructured-ingest) - -This python module will be removed from this repo in the near future. diff --git a/unstructured/ingest/__init__.py b/unstructured/ingest/__init__.py deleted file mode 100644 index cae55db4a..000000000 --- a/unstructured/ingest/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from __future__ import annotations - -import warnings - -warnings.warn( - "unstructured.ingest will be removed in a future version. " - "Functionality moved to the unstructured-ingest project.", - DeprecationWarning, - stacklevel=2, -) diff --git a/unstructured/ingest/cli/__init__.py b/unstructured/ingest/cli/__init__.py deleted file mode 100644 index f3490ae22..000000000 --- a/unstructured/ingest/cli/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -import typing as t - -import click - -from unstructured.ingest.cli.cmds import base_dest_cmd_fns, base_src_cmd_fns - -src: t.List[click.Group] = [v().get_src_cmd() for v in base_src_cmd_fns] - -dest: t.List[click.Command] = [v().get_dest_cmd() for v in base_dest_cmd_fns] - -__all__ = [ - "src", - "dest", -] diff --git a/unstructured/ingest/cli/base/__init__.py b/unstructured/ingest/cli/base/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/cli/base/cmd.py b/unstructured/ingest/cli/base/cmd.py deleted file mode 100644 index f02a81424..000000000 --- a/unstructured/ingest/cli/base/cmd.py +++ /dev/null @@ -1,19 +0,0 @@ -import typing as t -from abc import ABC -from dataclasses import dataclass, field - -from unstructured.ingest.cli.interfaces import CliConfig -from unstructured.ingest.interfaces import BaseConfig - - -@dataclass -class BaseCmd(ABC): - cmd_name: str - cli_config: t.Optional[t.Type[BaseConfig]] = None - additional_cli_options: t.List[t.Type[CliConfig]] = field(default_factory=list) - addition_configs: t.Dict[str, t.Type[BaseConfig]] = field(default_factory=dict) - is_fsspec: bool = False - - @property - def cmd_name_key(self): - return self.cmd_name.replace("-", "_") diff --git a/unstructured/ingest/cli/base/dest.py b/unstructured/ingest/cli/base/dest.py deleted file mode 100644 index 4b3d62739..000000000 --- a/unstructured/ingest/cli/base/dest.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.cmd import BaseCmd -from unstructured.ingest.cli.cmd_factory import get_src_cmd -from unstructured.ingest.cli.common import ( - log_options, -) -from unstructured.ingest.cli.interfaces import BaseConfig, CliFilesStorageConfig -from unstructured.ingest.cli.utils import ( - add_options, - conform_click_options, - extract_config, - extract_configs, -) -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner.writers import writer_map - - -@dataclass -class BaseDestCmd(BaseCmd): - write_config: t.Optional[t.Type[BaseConfig]] = None - - def get_dest_runner(self, source_cmd: str, options: dict, parent_options: dict): - src_cmd_fn = get_src_cmd(cmd_name=source_cmd) - src_cmd = src_cmd_fn() - runner = src_cmd.get_source_runner(options=parent_options) - addition_configs = self.addition_configs - if "connector_config" not in addition_configs: - addition_configs["connector_config"] = self.cli_config - if self.write_config: - addition_configs["write_config"] = self.write_config - configs = extract_configs( - options, - validate=[self.cli_config] if self.cli_config else None, - extras=addition_configs, - add_defaults=False, - ) - writer_cls = writer_map[self.cmd_name_key] - writer = writer_cls(**configs) # type: ignore - runner.writer = writer - runner.writer_kwargs = options - return runner - - def check_dest_options(self, options: dict): - extract_config(flat_data=options, config=self.cli_config) - - def dest(self, ctx: click.Context, **options): - if not ctx.parent: - raise click.ClickException("destination command called without a parent") - if not ctx.parent.info_name: - raise click.ClickException("parent command missing info name") - source_cmd = ctx.parent.info_name.replace("-", "_") - parent_options: dict = ctx.parent.params if ctx.parent else {} - conform_click_options(options) - verbose = parent_options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(parent_options, verbose=verbose) - log_options(options, verbose=verbose) - try: - self.check_dest_options(options=options) - runner = self.get_dest_runner( - source_cmd=source_cmd, - options=options, - parent_options=parent_options, - ) - runner.run(**parent_options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - def get_dest_cmd(self) -> click.Command: - # Dynamically create the command without the use of click decorators - fn = self.dest - fn = click.pass_context(fn) - cmd: click.Group = click.command(fn) - cmd.name = self.cmd_name - cmd.invoke_without_command = True - options = [self.cli_config] if self.cli_config else [] - options += self.additional_cli_options - if self.is_fsspec and CliFilesStorageConfig not in options: - options.append(CliFilesStorageConfig) - add_options(cmd, extras=options, is_src=False) - return cmd diff --git a/unstructured/ingest/cli/base/src.py b/unstructured/ingest/cli/base/src.py deleted file mode 100644 index 70acbced4..000000000 --- a/unstructured/ingest/cli/base/src.py +++ /dev/null @@ -1,57 +0,0 @@ -import logging -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.cmd import BaseCmd -from unstructured.ingest.cli.common import ( - log_options, -) -from unstructured.ingest.cli.interfaces import CliFilesStorageConfig -from unstructured.ingest.cli.utils import Group, add_options, conform_click_options, extract_configs -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.runner import runner_map - - -@dataclass -class BaseSrcCmd(BaseCmd): - def get_source_runner(self, options: dict): - addition_configs = self.addition_configs - if "connector_config" not in addition_configs: - addition_configs["connector_config"] = self.cli_config - configs = extract_configs( - options, - validate=[self.cli_config] if self.cli_config else None, - extras=addition_configs, - ) - runner = runner_map[self.cmd_name_key] - return runner(**configs) # type: ignore - - def src(self, ctx: click.Context, **options): - if ctx.invoked_subcommand: - return - - conform_click_options(options) - verbose = options.get("verbose", False) - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) - log_options(options, verbose=verbose) - try: - runner = self.get_source_runner(options=options) - runner.run(**options) - except Exception as e: - logger.error(e, exc_info=True) - raise click.ClickException(str(e)) from e - - def get_src_cmd(self) -> click.Group: - # Dynamically create the command without the use of click decorators - fn = self.src - fn = click.pass_context(fn) - cmd: click.Group = click.group(fn, cls=Group) - cmd.name = self.cmd_name - cmd.invoke_without_command = True - extra_options = [self.cli_config] if self.cli_config else [] - extra_options += self.additional_cli_options - if self.is_fsspec and CliFilesStorageConfig not in extra_options: - extra_options.append(CliFilesStorageConfig) - add_options(cmd, extras=extra_options) - return cmd diff --git a/unstructured/ingest/cli/cli.py b/unstructured/ingest/cli/cli.py deleted file mode 100644 index fa7c3008e..000000000 --- a/unstructured/ingest/cli/cli.py +++ /dev/null @@ -1,32 +0,0 @@ -import click - -from unstructured.ingest.cli import dest, src -from unstructured.ingest.v2.cli.cmds import dest as dest_v2 -from unstructured.ingest.v2.cli.cmds import src as src_v2 - - -@click.group() -def ingest(): - pass - - -def get_cmd() -> click.Command: - """Construct and return a Click command object representing the main command for the CLI. - - This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those - to the main command as nested subcommands. - """ - cmd = ingest - src_dict = {s.name: s for s in src} - dest_dict = {d.name: d for d in dest} - for s in src_v2: - src_dict[s.name] = s - for d in dest_v2: - dest_dict[d.name] = d - # Add all subcommands - for src_subcommand in src_dict.values(): - # Add all destination subcommands - for dest_subcommand in dest_dict.values(): - src_subcommand.add_command(dest_subcommand) - cmd.add_command(src_subcommand) - return cmd diff --git a/unstructured/ingest/cli/cmd_factory.py b/unstructured/ingest/cli/cmd_factory.py deleted file mode 100644 index 3260828cb..000000000 --- a/unstructured/ingest/cli/cmd_factory.py +++ /dev/null @@ -1,12 +0,0 @@ -import typing as t - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.cmds import base_src_cmd_fns - - -def get_src_cmd_map() -> t.Dict[str, t.Callable[[], BaseSrcCmd]]: - return {b().cmd_name_key: b for b in base_src_cmd_fns} - - -def get_src_cmd(cmd_name: str) -> t.Callable[[], BaseSrcCmd]: - return get_src_cmd_map()[cmd_name] diff --git a/unstructured/ingest/cli/cmds/__init__.py b/unstructured/ingest/cli/cmds/__init__.py deleted file mode 100644 index f75ee797e..000000000 --- a/unstructured/ingest/cli/cmds/__init__.py +++ /dev/null @@ -1,145 +0,0 @@ -from __future__ import annotations - -import collections -import typing as t - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.cmds.fsspec.sftp import get_base_src_cmd as sftp_base_src_cmd - -from .airtable import get_base_src_cmd as airtable_base_src_cmd -from .astradb import get_base_dest_cmd as astradb_base_dest_cmd -from .astradb import get_base_src_cmd as astradb_base_src_cmd -from .azure_cognitive_search import get_base_dest_cmd as azure_cognitive_search_base_dest_cmd -from .biomed import get_base_src_cmd as biomed_base_src_cmd -from .chroma import get_base_dest_cmd as chroma_base_dest_cmd -from .clarifai import get_base_dest_cmd as clarifai_base_dest_cmd -from .confluence import get_base_src_cmd as confluence_base_src_cmd -from .databricks_volumes import get_base_dest_cmd as databricks_volumes_dest_cmd -from .delta_table import get_base_dest_cmd as delta_table_dest_cmd -from .delta_table import get_base_src_cmd as delta_table_base_src_cmd -from .discord import get_base_src_cmd as discord_base_src_cmd -from .elasticsearch import get_base_dest_cmd as elasticsearch_base_dest_cmd -from .elasticsearch import get_base_src_cmd as elasticsearch_base_src_cmd -from .fsspec.azure import get_base_dest_cmd as azure_base_dest_cmd -from .fsspec.azure import get_base_src_cmd as azure_base_src_cmd -from .fsspec.box import get_base_dest_cmd as box_base_dest_cmd -from .fsspec.box import get_base_src_cmd as box_base_src_cmd -from .fsspec.dropbox import get_base_dest_cmd as dropbox_base_dest_cmd -from .fsspec.dropbox import get_base_src_cmd as dropbox_base_src_cmd -from .fsspec.fsspec import get_base_dest_cmd as fsspec_base_dest_cmd -from .fsspec.fsspec import get_base_src_cmd as fsspec_base_src_cmd -from .fsspec.gcs import get_base_dest_cmd as gcs_base_dest_cmd -from .fsspec.gcs import get_base_src_cmd as gcs_base_src_cmd -from .fsspec.s3 import get_base_dest_cmd as s3_base_dest_cmd -from .fsspec.s3 import get_base_src_cmd as s3_base_src_cmd -from .github import get_base_src_cmd as github_base_src_cmd -from .gitlab import get_base_src_cmd as gitlab_base_src_cmd -from .google_drive import get_base_src_cmd as google_drive_base_src_cmd -from .hubspot import get_base_src_cmd as hubspot_base_src_cmd -from .jira import get_base_src_cmd as jira_base_src_cmd -from .kafka import get_base_dest_cmd as kafka_base_dest_cmd -from .kafka import get_base_src_cmd as kafka_base_src_cmd -from .local import get_base_src_cmd as local_base_src_cmd -from .mongodb import get_base_dest_cmd as mongo_base_dest_cmd -from .mongodb import get_base_src_cmd as mongodb_base_src_cmd -from .notion import get_base_src_cmd as notion_base_src_cmd -from .onedrive import get_base_src_cmd as onedrive_base_src_cmd -from .opensearch import get_base_dest_cmd as opensearch_base_dest_cmd -from .opensearch import get_base_src_cmd as opensearch_base_src_cmd -from .outlook import get_base_src_cmd as outlook_base_src_cmd -from .pinecone import get_base_dest_cmd as pinecone_base_dest_cmd -from .qdrant import get_base_dest_cmd as qdrant_base_dest_cmd -from .reddit import get_base_src_cmd as reddit_base_src_cmd -from .salesforce import get_base_src_cmd as salesforce_base_src_cmd -from .sharepoint import get_base_src_cmd as sharepoint_base_src_cmd -from .slack import get_base_src_cmd as slack_base_src_cmd -from .sql import get_base_dest_cmd as sql_base_dest_cmd -from .vectara import get_base_dest_cmd as vectara_base_dest_cmd -from .weaviate import get_base_dest_cmd as weaviate_dest_cmd -from .wikipedia import get_base_src_cmd as wikipedia_base_src_cmd - -if t.TYPE_CHECKING: - from unstructured.ingest.cli.base.dest import BaseDestCmd - -base_src_cmd_fns: t.List[t.Callable[[], BaseSrcCmd]] = [ - airtable_base_src_cmd, - astradb_base_src_cmd, - azure_base_src_cmd, - biomed_base_src_cmd, - box_base_src_cmd, - confluence_base_src_cmd, - delta_table_base_src_cmd, - discord_base_src_cmd, - dropbox_base_src_cmd, - elasticsearch_base_src_cmd, - fsspec_base_src_cmd, - gcs_base_src_cmd, - github_base_src_cmd, - gitlab_base_src_cmd, - google_drive_base_src_cmd, - hubspot_base_src_cmd, - jira_base_src_cmd, - kafka_base_src_cmd, - local_base_src_cmd, - mongodb_base_src_cmd, - notion_base_src_cmd, - onedrive_base_src_cmd, - opensearch_base_src_cmd, - outlook_base_src_cmd, - reddit_base_src_cmd, - salesforce_base_src_cmd, - sftp_base_src_cmd, - sharepoint_base_src_cmd, - slack_base_src_cmd, - s3_base_src_cmd, - wikipedia_base_src_cmd, -] - -# Make sure there are not overlapping names -src_cmd_names = [b().cmd_name for b in base_src_cmd_fns] -src_duplicates = [item for item, count in collections.Counter(src_cmd_names).items() if count > 1] -if src_duplicates: - raise ValueError( - "multiple base src commands defined with the same names: {}".format( - ", ".join(src_duplicates), - ), - ) - -base_dest_cmd_fns: t.List[t.Callable[[], "BaseDestCmd"]] = [ - astradb_base_dest_cmd, - azure_base_dest_cmd, - box_base_dest_cmd, - chroma_base_dest_cmd, - clarifai_base_dest_cmd, - databricks_volumes_dest_cmd, - dropbox_base_dest_cmd, - elasticsearch_base_dest_cmd, - fsspec_base_dest_cmd, - gcs_base_dest_cmd, - kafka_base_dest_cmd, - s3_base_dest_cmd, - azure_cognitive_search_base_dest_cmd, - delta_table_dest_cmd, - sql_base_dest_cmd, - weaviate_dest_cmd, - mongo_base_dest_cmd, - pinecone_base_dest_cmd, - qdrant_base_dest_cmd, - opensearch_base_dest_cmd, - vectara_base_dest_cmd, -] - -# Make sure there are not overlapping names -dest_cmd_names = [b().cmd_name for b in base_dest_cmd_fns] -dest_duplicates = [item for item, count in collections.Counter(dest_cmd_names).items() if count > 1] -if dest_duplicates: - raise ValueError( - "multiple base dest commands defined with the same names: {}".format( - ", ".join(dest_duplicates), - ), - ) - -__all__ = [ - "base_src_cmd_fns", - "base_dest_cmd_fns", -] diff --git a/unstructured/ingest/cli/cmds/airtable.py b/unstructured/ingest/cli/cmds/airtable.py deleted file mode 100644 index c7462a707..000000000 --- a/unstructured/ingest/cli/cmds/airtable.py +++ /dev/null @@ -1,69 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.airtable import SimpleAirtableConfig - - -@dataclass -class AirtableCliConfig(SimpleAirtableConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--personal-access-token"], - default=None, - help="Personal access token to authenticate into Airtable. Check: " - "https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens " - "for more info", - ), - click.Option( - ["--list-of-paths"], - default=None, - help=""" - A list of paths that specify the locations to ingest data from within Airtable. - - If this argument is not set, the connector ingests all tables within each and every base. - --list-of-paths: path1 path2 path3 …. - path: base_id/table_id(optional)/view_id(optional)/ - - To obtain (base, table, view) ids in bulk, check: - https://airtable.com/developers/web/api/list-bases (base ids) - https://airtable.com/developers/web/api/get-base-schema (table and view ids) - https://pyairtable.readthedocs.io/en/latest/metadata.html (base, table and view ids) - - To obtain specific ids from Airtable UI, go to your workspace, and copy any - relevant id from the URL structure: - https://airtable.com/appAbcDeF1ghijKlm/tblABcdEfG1HIJkLm/viwABCDEfg6hijKLM - appAbcDeF1ghijKlm -> base_id - tblABcdEfG1HIJkLm -> table_id - viwABCDEfg6hijKLM -> view_id - - You can also check: https://support.airtable.com/docs/finding-airtable-ids - - Here is an example for one --list-of-paths: - base1/ → gets the entirety of all tables inside base1 - base1/table1 → gets all rows and columns within table1 in base1 - base1/table1/view1 → gets the rows and columns that are - visible in view1 for the table1 in base1 - - Examples to invalid airtable_paths: - table1 → has to mention base to be valid - base1/view1 → has to mention table to be valid - """, - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="airtable", - cli_config=AirtableCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/astradb.py b/unstructured/ingest/cli/cmds/astradb.py deleted file mode 100644 index b7be8f56c..000000000 --- a/unstructured/ingest/cli/cmds/astradb.py +++ /dev/null @@ -1,99 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig, Dict -from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig - - -@dataclass -class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--token"], - required=True, - type=str, - help="Astra DB Token with access to the database.", - envvar="ASTRA_DB_APPLICATION_TOKEN", - show_envvar=True, - ), - click.Option( - ["--api-endpoint"], - required=True, - type=str, - help="The API endpoint for the Astra DB.", - envvar="ASTRA_DB_API_ENDPOINT", - show_envvar=True, - ), - click.Option( - ["--collection-name"], - required=False, - type=str, - help="The name of the Astra DB collection. " - "Note that the collection name must only include letters, " - "numbers, and underscores.", - ), - click.Option( - ["--namespace"], - required=False, - default=None, - type=str, - help="The Astra DB connection namespace.", - ), - ] - return options - - -@dataclass -class AstraDBCliWriteConfig(AstraDBWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--embedding-dimension"], - required=True, - default=384, - type=int, - help="The dimensionality of the embeddings", - ), - click.Option( - ["--requested-indexing-policy"], - required=False, - default=None, - type=Dict(), - help="The indexing policy to use for the collection." - 'example: \'{"deny": ["metadata"]}\' ', - ), - click.Option( - ["--batch-size"], - default=20, - type=int, - help="Number of records per batch", - ), - ] - return options - - -def get_base_src_cmd(): - from unstructured.ingest.cli.base.src import BaseSrcCmd - - cmd_cls = BaseSrcCmd( - cmd_name="astradb", - cli_config=AstraDBCliConfig, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="astradb", - cli_config=AstraDBCliConfig, - additional_cli_options=[AstraDBCliWriteConfig], - write_config=AstraDBWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/azure_cognitive_search.py b/unstructured/ingest/cli/cmds/azure_cognitive_search.py deleted file mode 100644 index 029519fb8..000000000 --- a/unstructured/ingest/cli/cmds/azure_cognitive_search.py +++ /dev/null @@ -1,65 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.azure_cognitive_search import ( - AzureCognitiveSearchWriteConfig, - SimpleAzureCognitiveSearchStorageConfig, -) - - -@dataclass -class AzureCognitiveSearchCliConfig(SimpleAzureCognitiveSearchStorageConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--key"], - required=True, - type=str, - help="Key credential used for authenticating to an Azure service.", - envvar="AZURE_SEARCH_API_KEY", - show_envvar=True, - ), - click.Option( - ["--endpoint"], - required=True, - type=str, - help="The URL endpoint of an Azure search service. " - "In the form of https://{{service_name}}.search.windows.net", - envvar="AZURE_SEARCH_ENDPOINT", - show_envvar=True, - ), - ] - return options - - -@dataclass -class AzureCognitiveSearchCliWriteConfig(AzureCognitiveSearchWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--index"], - required=True, - type=str, - help="The name of the index to connect to", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="azure-cognitive-search", - cli_config=AzureCognitiveSearchCliConfig, - additional_cli_options=[AzureCognitiveSearchCliWriteConfig], - write_config=AzureCognitiveSearchCliWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/biomed.py b/unstructured/ingest/cli/cmds/biomed.py deleted file mode 100644 index bafe403f3..000000000 --- a/unstructured/ingest/cli/cmds/biomed.py +++ /dev/null @@ -1,52 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.biomed import SimpleBiomedConfig - - -@dataclass -class BiomedCliConfig(SimpleBiomedConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--api-id"], - default=None, - help="ID parameter for OA Web Service API.", - ), - click.Option( - ["--api-from"], - default=None, - help="From parameter for OA Web Service API.", - ), - click.Option( - ["--api-until"], - default=None, - help="Until parameter for OA Web Service API.", - ), - click.Option( - ["--path"], - default=None, - help="PMC Open Access FTP Directory Path.", - ), - click.Option( - ["--max-request-time"], - default=45, - help="(In seconds) Max request time to OA Web Service API.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="biomed", - cli_config=BiomedCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/chroma.py b/unstructured/ingest/cli/cmds/chroma.py deleted file mode 100644 index c4a5cbcce..000000000 --- a/unstructured/ingest/cli/cmds/chroma.py +++ /dev/null @@ -1,104 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig, Dict -from unstructured.ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig - - -@dataclass -class ChromaCliConfig(SimpleChromaConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--path"], - required=False, - type=str, - help="Location where Chroma is persisted," "if not connecting via http.", - ), - click.Option( - ["--settings"], - required=False, - type=Dict(), - help="A dictionary of settings to communicate with the chroma server." - 'example: \'{"persist_directory":"./chroma-persist"}\' ', - ), - click.Option( - ["--tenant"], - required=False, - default="default_tenant", - type=str, - help="The tenant to use for this client. Chroma defaults to 'default_tenant'.", - ), - click.Option( - ["--database"], - required=False, - default="default_database", - type=str, - help="The database to use for this client." - "Chroma defaults to 'default_database'.", - ), - click.Option( - ["--host"], - required=False, - type=str, - help="The hostname of the Chroma server.", - ), - click.Option( - ["--port"], - required=False, - type=int, - help="The port of the Chroma server.", - ), - click.Option( - ["--ssl"], - required=False, - default=False, - is_flag=True, - type=bool, - help="Whether to use SSL to connect to the Chroma server.", - ), - click.Option( - ["--headers"], - required=False, - type=Dict(), - help="A dictionary of headers to send to the Chroma server." - 'example: \'{"Authorization":"Basic()"}\' ', - ), - click.Option( - ["--collection-name"], - required=True, - type=str, - help="The name of the Chroma collection to write into.", - ), - ] - return options - - -@dataclass -class ChromaCliWriteConfig(ChromaWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="chroma", - cli_config=ChromaCliConfig, - additional_cli_options=[ChromaCliWriteConfig], - write_config=ChromaWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/clarifai.py b/unstructured/ingest/cli/cmds/clarifai.py deleted file mode 100644 index 23178d172..000000000 --- a/unstructured/ingest/cli/cmds/clarifai.py +++ /dev/null @@ -1,71 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig -from unstructured.ingest.connector.clarifai import ( - ClarifaiWriteConfig, - SimpleClarifaiConfig, -) - -CMD_NAME = "clarifai" - - -@dataclass -class ClarifaiCliConfig(SimpleClarifaiConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--api-key"], - required=True, - type=str, - help="The CLARIFAI_PAT of the user to access clarifai platform apps and models", - envvar="CLARIFAI_PAT", - show_envvar=True, - ), - click.Option( - ["--app-id"], - required=True, - type=str, - help="Clarifai app name/id", - ), - click.Option( - ["--user-id"], - required=True, - type=str, - help="Clarifai User name/ID", - ), - click.Option( - ["--dataset-id"], type=str, default=None, help="Clarifai App Dataset ID (optional)" - ), - ] - return options - - -@dataclass -class ClarifaiCliWriteConfig(ClarifaiWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.option]: - options = [ - click.Option( - ["--batch-size"], - type=int, - default=50, - help="No of inputs upload per batch", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=ClarifaiCliConfig, - additional_cli_options=[ClarifaiCliWriteConfig], - write_config=ClarifaiWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/confluence.py b/unstructured/ingest/cli/cmds/confluence.py deleted file mode 100644 index 1fc43d2ae..000000000 --- a/unstructured/ingest/cli/cmds/confluence.py +++ /dev/null @@ -1,69 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - DelimitedString, -) -from unstructured.ingest.connector.confluence import SimpleConfluenceConfig - - -@dataclass -class ConfluenceCliConfig(SimpleConfluenceConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--api-token"], - required=True, - help="API Token to authenticate into Confluence Cloud. " - "Check " - "https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/ " - "for more info.", - ), - click.Option( - ["--url"], - required=True, - help='URL to Confluence Cloud, e.g. "unstructured-ingest-test.atlassian.net"', - ), - click.Option( - ["--user-email"], - required=True, - help="Email to authenticate into Confluence Cloud", - ), - click.Option( - ["--spaces"], - default=None, - type=DelimitedString(), - help="A list of confluence space ids to be fetched. From each fetched space, " - "--num-of-docs-from-each-space number of docs will be ingested. " - "--spaces and --num-of-spaces cannot be used at the same time", - ), - click.Option( - ["--max-num-of-docs-from-each-space"], - default=100, - help="Number of documents to be aimed to be ingested from each fetched " - "confluence space. If any space has fewer documents, all the documents from " - "that space will be ingested. Documents are not necessarily " - "ingested in order of creation date.", - ), - click.Option( - ["--max-num-of-spaces"], - default=500, - help="Number of confluence space ids to be fetched. From each fetched space, " - "--num-of-docs-from-each-space number of docs will be ingested. " - "--spaces and --num-of-spaces cannot be used at the same time", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="confluence", - cli_config=ConfluenceCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/databricks_volumes.py b/unstructured/ingest/cli/cmds/databricks_volumes.py deleted file mode 100644 index faea5e0d4..000000000 --- a/unstructured/ingest/cli/cmds/databricks_volumes.py +++ /dev/null @@ -1,163 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig -from unstructured.ingest.connector.databricks_volumes import ( - DatabricksVolumesWriteConfig, - SimpleDatabricksVolumesConfig, -) - -CMD_NAME = "databricks-volumes" - - -@dataclass -class DatabricksVolumesCliConfig(SimpleDatabricksVolumesConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--host"], - type=str, - default=None, - help="The Databricks host URL for either the " - "Databricks workspace endpoint or the " - "Databricks accounts endpoint.", - ), - click.Option( - ["--account-id"], - type=str, - default=None, - help="The Databricks account ID for the Databricks " - "accounts endpoint. Only has effect when Host is " - "either https://accounts.cloud.databricks.com/ (AWS), " - "https://accounts.azuredatabricks.net/ (Azure), " - "or https://accounts.gcp.databricks.com/ (GCP).", - ), - click.Option( - ["--username"], - type=str, - default=None, - help="The Databricks username part of basic authentication. " - "Only possible when Host is *.cloud.databricks.com (AWS).", - ), - click.Option( - ["--password"], - type=str, - default=None, - help="The Databricks password part of basic authentication. " - "Only possible when Host is *.cloud.databricks.com (AWS).", - ), - click.Option(["--client-id"], type=str, default=None), - click.Option(["--client-secret"], type=str, default=None), - click.Option( - ["--token"], - type=str, - default=None, - help="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or " - "Azure Active Directory (Azure AD) token (Azure).", - ), - click.Option( - ["--azure-workspace-resource-id"], - type=str, - default=None, - help="The Azure Resource Manager ID for the Azure Databricks workspace, " - "which is exchanged for a Databricks host URL.", - ), - click.Option( - ["--azure-client-secret"], - type=str, - default=None, - help="The Azure AD service principal’s client secret.", - ), - click.Option( - ["--azure-client-id"], - type=str, - default=None, - help="The Azure AD service principal’s application ID.", - ), - click.Option( - ["--azure-tenant-id"], - type=str, - default=None, - help="The Azure AD service principal’s tenant ID.", - ), - click.Option( - ["--azure-environment"], - type=str, - default=None, - help="The Azure environment type (such as Public, UsGov, China, and Germany) for a " - "specific set of API endpoints. Defaults to PUBLIC.", - ), - click.Option( - ["--auth-type"], - type=str, - default=None, - help="When multiple auth attributes are available in the " - "environment, use the auth type specified by this " - "argument. This argument also holds the currently " - "selected auth.", - ), - click.Option(["--cluster-id"], type=str, default=None), - click.Option(["--google-credentials"], type=str, default=None), - click.Option(["--google-service-account"], type=str, default=None), - ] - return options - - -@dataclass -class DatabricksVolumesCliWriteConfig(DatabricksVolumesWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--volume"], type=str, required=True, help="Name of volume in the Unity Catalog" - ), - click.Option( - ["--catalog"], - type=str, - required=True, - help="Name of the catalog in the Databricks Unity Catalog service", - ), - click.Option( - ["--volume-path"], - type=str, - required=False, - default=None, - help="Optional path within the volume to write to", - ), - click.Option( - ["--overwrite"], - type=bool, - is_flag=True, - help="If true, an existing file will be overwritten.", - ), - click.Option( - ["--encoding"], - type=str, - required=True, - default="utf-8", - help="Encoding applied to the data when written to the volume", - ), - click.Option( - ["--schema"], - type=str, - required=True, - default="default", - help="Schema associated with the volume to write to in the Unity Catalog service", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=DatabricksVolumesCliConfig, - additional_cli_options=[DatabricksVolumesCliWriteConfig], - write_config=DatabricksVolumesWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/delta_table.py b/unstructured/ingest/cli/cmds/delta_table.py deleted file mode 100644 index 8504c09b0..000000000 --- a/unstructured/ingest/cli/cmds/delta_table.py +++ /dev/null @@ -1,94 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliConfig, Dict -from unstructured.ingest.connector.delta_table import DeltaTableWriteConfig, SimpleDeltaTableConfig - -CMD_NAME = "delta-table" - - -@dataclass -class DeltaTableCliConfig(SimpleDeltaTableConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--table-uri"], - required=True, - help="the path of the DeltaTable", - ), - click.Option( - ["--version"], - default=None, - type=int, - help="version of the DeltaTable", - ), - click.Option( - ["--storage_options"], - required=False, - type=Dict(), - default=None, - help="a dictionary of the options to use for the storage backend, " - "passed in as a json string", - ), - click.Option( - ["--without-files"], - is_flag=True, - default=False, - help="If set, will load table without tracking files.", - ), - ] - return options - - -@dataclass -class DeltaTableCliWriteConfig(DeltaTableWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--overwrite-schema"], - is_flag=True, - default=False, - help="Flag to overwrite schema of destination table", - ), - click.Option( - ["--drop-empty-cols"], - is_flag=True, - default=False, - help="Flag to drop any columns that have no content", - ), - click.Option( - ["--mode"], - default="error", - type=click.Choice(["error", "append", "overwrite", "ignore"]), - help="How to handle existing data. Default is to error if table already exists. " - "If 'append', will add new data. " - "If 'overwrite', will replace table with new data. " - "If 'ignore', will not write anything if table already exists.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=DeltaTableCliConfig, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=DeltaTableCliConfig, - additional_cli_options=[DeltaTableCliWriteConfig], - write_config=DeltaTableWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/discord.py b/unstructured/ingest/cli/cmds/discord.py deleted file mode 100644 index 115745a6a..000000000 --- a/unstructured/ingest/cli/cmds/discord.py +++ /dev/null @@ -1,47 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - DelimitedString, -) -from unstructured.ingest.connector.discord import SimpleDiscordConfig - - -@dataclass -class DiscordCliConfig(SimpleDiscordConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--token"], - required=True, - help="Bot token used to access Discord API, must have " - "READ_MESSAGE_HISTORY scope for the bot user", - ), - click.Option( - ["--channels"], - required=True, - type=DelimitedString(), - help="Comma-delimited list of discord channel ids to ingest from.", - ), - click.Option( - ["--period"], - default=None, - type=click.IntRange(0), - help="Number of days to go back in the history of " - "discord channels, must be a number", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="discord", - cli_config=DiscordCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/elasticsearch.py b/unstructured/ingest/cli/cmds/elasticsearch.py deleted file mode 100644 index 58e3ec4d6..000000000 --- a/unstructured/ingest/cli/cmds/elasticsearch.py +++ /dev/null @@ -1,133 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString -from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchWriteConfig, - SimpleElasticsearchConfig, -) - -CMD_NAME = "elasticsearch" - - -@dataclass -class ElasticsearchCliConfig(SimpleElasticsearchConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--index-name"], - required=True, - type=str, - help="Name of the Elasticsearch index to pull data from, or upload data to.", - ), - click.Option( - ["--hosts"], - type=DelimitedString(), - help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"', - ), - click.Option( - ["--fields"], - type=DelimitedString(), - default=[], - help="If provided, will limit the fields returned by Elasticsearch " - "to this comma-delimited list", - ), - click.Option( - ["--username"], type=str, default=None, help="username when using basic auth" - ), - click.Option( - ["--password"], - type=str, - default=None, - help="password when using basic auth or connecting to a cloud instance", - ), - click.Option( - ["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud" - ), - click.Option( - ["--es-api-key"], type=str, default=None, help="api key used for authentication" - ), - click.Option( - ["--api-key-id"], - type=str, - default=None, - help="id associated with api key used for authentication: " - "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html", # noqa: E501 - ), - click.Option( - ["--bearer-auth"], - type=str, - default=None, - help="bearer token used for HTTP bearer authentication", - ), - click.Option( - ["--ca-certs"], - type=click.Path(), - default=None, - ), - click.Option( - ["--ssl-assert-fingerprint"], - type=str, - default=None, - help="SHA256 fingerprint value", - ), - click.Option( - ["--batch-size"], - default=100, - type=click.IntRange(0), - help="how many records to read at a time per process", - ), - ] - return options - - -@dataclass -class ElasticsearchCliWriteConfig(ElasticsearchWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size-bytes"], - required=False, - default=15_000_000, - type=int, - help="Size limit (in bytes) for each batch of items to be uploaded. Check" - " https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html" - "#_how_big_is_too_big for more information.", - ), - click.Option( - ["--num-processes"], - required=False, - default=1, - type=int, - help="Number of processes to be used while uploading content", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="elasticsearch", - cli_config=ElasticsearchCliConfig, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="elasticsearch", - cli_config=ElasticsearchCliConfig, - additional_cli_options=[ElasticsearchCliWriteConfig], - addition_configs={ - "connector_config": SimpleElasticsearchConfig, - "write_config": ElasticsearchCliWriteConfig, - }, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/__init__.py b/unstructured/ingest/cli/cmds/fsspec/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/cli/cmds/fsspec/azure.py b/unstructured/ingest/cli/cmds/fsspec/azure.py deleted file mode 100644 index 0d5f04344..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/azure.py +++ /dev/null @@ -1,94 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.fsspec.azure import ( - AzureWriteConfig, - SimpleAzureBlobStorageConfig, -) - -CMD_NAME = "azure" - - -@dataclass -class AzureCliConfig(SimpleAzureBlobStorageConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--account-key"], - default=None, - help="The storage account key. This is used for shared key " - "authentication. If any of account key, sas token or " - "client_id are not specified, anonymous access will be used.", - ), - click.Option( - ["--account-name"], - default=None, - help="The storage account name. This is used to authenticate " - "requests signed with an account key and to construct " - "the storage endpoint. It is required unless a connection " - "string is given, or if a custom domain is used with " - "anonymous authentication.", - ), - click.Option( - ["--connection-string"], - default=None, - help="If specified, this will override all other parameters. See " - "http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ " # noqa: E501 - "for the connection string format.", - ), - click.Option( - ["--sas_token"], - default=None, - help="A shared access signature token to use to authenticate " - "requests instead of the account key. If account key and " - "sas token are both specified, account key will be used " - "to sign. If any of account key, sas token or client_id " - "are not specified, anonymous access will be used.", - ), - ] - return options - - -@dataclass -class AzureCliWriteConfig(AzureWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--overwrite"], - is_flag=True, - default=False, - show_default=True, - help="If set, will overwrite content if content already exists", - ) - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=AzureCliConfig, - is_fsspec=True, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=AzureCliConfig, - write_config=AzureCliWriteConfig, - is_fsspec=True, - additional_cli_options=[AzureCliWriteConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/box.py b/unstructured/ingest/cli/cmds/fsspec/box.py deleted file mode 100644 index 0d7976350..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/box.py +++ /dev/null @@ -1,48 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig - -CMD_NAME = "box" - - -@dataclass -class BoxCliConfig(SimpleBoxConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--box-app-config"], - default=None, - type=click.Path(), - help="Path to Box app credentials as json file.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=BoxCliConfig, - is_fsspec=True, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=BoxCliConfig, - write_config=BoxWriteConfig, - is_fsspec=True, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/dropbox.py b/unstructured/ingest/cli/cmds/fsspec/dropbox.py deleted file mode 100644 index 247643016..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/dropbox.py +++ /dev/null @@ -1,51 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.fsspec.dropbox import ( - DropboxWriteConfig, - SimpleDropboxConfig, -) - -CMD_NAME = "dropbox" - - -@dataclass -class DropboxCliConfig(SimpleDropboxConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--token"], - required=True, - type=str, - help="Dropbox access token.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=DropboxCliConfig, - is_fsspec=True, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=DropboxCliConfig, - write_config=DropboxWriteConfig, - is_fsspec=True, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/fsspec.py b/unstructured/ingest/cli/cmds/fsspec/fsspec.py deleted file mode 100644 index e2d50a278..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/fsspec.py +++ /dev/null @@ -1,15 +0,0 @@ -from unstructured.ingest.cli.base.src import BaseSrcCmd - -CMD_NAME = "fsspec" - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd(cmd_name=CMD_NAME, is_fsspec=True) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd(cmd_name=CMD_NAME, is_fsspec=True) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/gcs.py b/unstructured/ingest/cli/cmds/fsspec/gcs.py deleted file mode 100644 index 4664694a7..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/gcs.py +++ /dev/null @@ -1,71 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - FileOrJson, -) -from unstructured.ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig - -CMD_NAME = "gcs" - - -@dataclass -class GcsCliConfig(SimpleGcsConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - help_string = """ - Options: - - ``None``, GCSFS will attempt to guess your credentials in the - following order: gcloud CLI default, gcsfs cached token, google compute - metadata service, anonymous. - - ``'google_default'``, your default gcloud credentials will be used, - which are typically established by doing ``gcloud login`` in a terminal. - - ``'cache'``, credentials from previously successful gcsfs - authentication will be used (use this after "browser" auth succeeded) - - ``'anon'``, no authentication is performed, and you can only - access data which is accessible to allUsers (in this case, the project and - access level parameters are meaningless) - - ``'browser'``, you get an access code with which you can - authenticate via a specially provided URL - - if ``'cloud'``, we assume we are running within google compute - or google container engine, and query the internal metadata directly for - a token. - - you may supply a token generated by the - [gcloud](https://cloud.google.com/sdk/docs/) - utility; this is either a python dictionary or the name of a file - containing the JSON returned by logging in with the gcloud CLI tool. - """ - options = [ - click.Option( - ["--service-account-key"], - default=None, - type=FileOrJson(allow_raw_str=True), - help=help_string, - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=GcsCliConfig, - is_fsspec=True, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=GcsCliConfig, - write_config=GcsWriteConfig, - is_fsspec=True, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/s3.py b/unstructured/ingest/cli/cmds/fsspec/s3.py deleted file mode 100644 index a185fa2e1..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/s3.py +++ /dev/null @@ -1,74 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config - -CMD_NAME = "s3" - - -@dataclass -class S3CliConfig(SimpleS3Config, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--anonymous"], - is_flag=True, - default=False, - help="Connect to s3 without local AWS credentials.", - ), - click.Option( - ["--endpoint-url"], - type=str, - default=None, - help="Use this endpoint_url, if specified. Needed for " - "connecting to non-AWS S3 buckets.", - ), - click.Option( - ["--key"], - type=str, - default=None, - help="If not anonymous, use this access key ID, if specified. Takes precedence " - "over `aws_access_key_id` in client_kwargs.", - ), - click.Option( - ["--secret"], - type=str, - default=None, - help="If not anonymous, use this secret access key, if specified.", - ), - click.Option( - ["--token"], - type=str, - default=None, - help="If not anonymous, use this security token, if specified.", - ), - ] - return options - - -def get_base_src_cmd(): - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=S3CliConfig, - is_fsspec=True, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=S3CliConfig, - write_config=S3WriteConfig, - is_fsspec=True, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/fsspec/sftp.py b/unstructured/ingest/cli/cmds/fsspec/sftp.py deleted file mode 100644 index 01f7c615a..000000000 --- a/unstructured/ingest/cli/cmds/fsspec/sftp.py +++ /dev/null @@ -1,58 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.fsspec.sftp import SimpleSftpConfig - -CMD_NAME = "sftp" - - -@dataclass -class SftpCliConfig(SimpleSftpConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--username"], - required=True, - type=str, - help="Username for sftp connection", - ), - click.Option( - ["--password"], - required=True, - type=str, - help="Password for sftp connection", - ), - click.Option( - ["--look-for-keys"], - required=False, - default=False, - is_flag=True, - type=bool, - help="Whether to search for private key files in ~/.ssh/", - ), - click.Option( - ["--allow-agent"], - required=False, - default=False, - is_flag=True, - type=bool, - help="Whether to connect to the SSH agent.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=SftpCliConfig, - is_fsspec=True, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/github.py b/unstructured/ingest/cli/cmds/github.py deleted file mode 100644 index bb3f1b7f0..000000000 --- a/unstructured/ingest/cli/cmds/github.py +++ /dev/null @@ -1,54 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString -from unstructured.ingest.connector.github import SimpleGitHubConfig - - -@dataclass -class GithubCliConfig(SimpleGitHubConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--url"], - required=True, - type=str, - help="URL to GitHub repository, e.g. " - '"https://github.com/Unstructured-IO/unstructured", or ' - 'a repository owner/name pair, e.g. "Unstructured-IO/unstructured"', - ), - click.Option( - ["--git-access-token"], - default=None, - help="A GitHub or GitLab access token, " - "see https://docs.github.com/en/authentication or " - "https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens", - ), - click.Option( - ["--git-branch"], - default=None, - type=str, - help="The branch for which to fetch files from. If not given," - " the default repository branch is used.", - ), - click.Option( - ["--git-file-glob"], - default=None, - type=DelimitedString(), - help="A comma-separated list of file globs to limit which " - "types of files are accepted, e.g. '*.html,*.txt'", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="github", - cli_config=GithubCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/gitlab.py b/unstructured/ingest/cli/cmds/gitlab.py deleted file mode 100644 index 5f01c4201..000000000 --- a/unstructured/ingest/cli/cmds/gitlab.py +++ /dev/null @@ -1,54 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString -from unstructured.ingest.connector.gitlab import SimpleGitlabConfig - - -@dataclass -class GitlabCliConfig(SimpleGitlabConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--url"], - required=True, - type=str, - help="URL to GitHub repository, e.g. " - '"https://github.com/Unstructured-IO/unstructured", or ' - 'a repository owner/name pair, e.g. "Unstructured-IO/unstructured"', - ), - click.Option( - ["--git-access-token"], - default=None, - help="A GitHub or GitLab access token, " - "see https://docs.github.com/en/authentication or " - "https://docs.gitlab.com/ee/api/rest/index.html#personalprojectgroup-access-tokens", - ), - click.Option( - ["--git-branch"], - default=None, - type=str, - help="The branch for which to fetch files from. If not given," - " the default repository branch is used.", - ), - click.Option( - ["--git-file-glob"], - default=None, - type=DelimitedString(), - help="A comma-separated list of file globs to limit which types of " - "files are accepted, e.g. '*.html,*.txt'", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="gitlab", - cli_config=GitlabCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/google_drive.py b/unstructured/ingest/cli/cmds/google_drive.py deleted file mode 100644 index 6fc9b1930..000000000 --- a/unstructured/ingest/cli/cmds/google_drive.py +++ /dev/null @@ -1,49 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, - FileOrJson, -) -from unstructured.ingest.connector.google_drive import SimpleGoogleDriveConfig - - -@dataclass -class GoogleDriveCliConfig(SimpleGoogleDriveConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--drive-id"], - required=True, - type=str, - help="Google Drive File or Folder ID.", - ), - click.Option( - ["--service-account-key"], - required=True, - type=FileOrJson(), - help="Either the file path of the credentials file to use or a json string of " - "those values to use for authentication", - ), - click.Option( - ["--extension"], - default=None, - type=str, - help="Filters the files to be processed based on extension e.g. .jpg, .docx, etc.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="google-drive", - cli_config=GoogleDriveCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/hubspot.py b/unstructured/ingest/cli/cmds/hubspot.py deleted file mode 100644 index 219973cb7..000000000 --- a/unstructured/ingest/cli/cmds/hubspot.py +++ /dev/null @@ -1,70 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliMixin, DelimitedString, Dict -from unstructured.ingest.connector.hubspot import HubSpotObjectTypes, SimpleHubSpotConfig - -OBJECT_TYPES = {t.value for t in HubSpotObjectTypes} - - -def validate_custom_property(ctx, param, value) -> t.Dict[str, t.List[str]]: - if not value: - return value - for k in value: - if k not in OBJECT_TYPES: - raise ValueError(f"Invalid object type: {k}, must be one of {OBJECT_TYPES}") - if not isinstance(value[k], list): - raise ValueError(f"Invalid type: {type(value[k])}, must be a Python list.") - return value - - -@dataclass -class HubSpotCliConfig(SimpleHubSpotConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--api-token"], - required=True, - type=str, - help="Access token to perform operations on Hubspot. \ - Check \ - https://developers.hubspot.com/docs/api/private-apps/ \ - for more info", - ), - click.Option( - ["--object-types"], - default=None, - required=False, - type=DelimitedString(choices=OBJECT_TYPES), - is_flag=False, - help=f"Object to include in the process.\ - Must be a subset of {','.join(OBJECT_TYPES)}.\ - If the argument is omitted all objects listed will be processed.", - ), - click.Option( - ["--custom-properties"], - default=None, - required=False, - type=Dict(), - is_flag=False, - callback=validate_custom_property, - help="Custom property to process information from.\ - It should be a json-like string in the form\ - :[, ..., ]\ - Must be internal name of the variable. If the property is missing, \ - it will be omitted.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="hubspot", - cli_config=HubSpotCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/jira.py b/unstructured/ingest/cli/cmds/jira.py deleted file mode 100644 index 74b2d5356..000000000 --- a/unstructured/ingest/cli/cmds/jira.py +++ /dev/null @@ -1,71 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - DelimitedString, -) -from unstructured.ingest.connector.jira import SimpleJiraConfig - - -@dataclass -class JiraCliConfig(SimpleJiraConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--api-token"], - required=True, - type=str, - help="API Token to authenticate into Jira (into Atlassian). \ - Check \ - https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/ \ - for more info.", - ), - click.Option( - ["--url"], - required=True, - type=str, - help="URL to Atlassian (Jira) Cloud, e.g. " - '"unstructured-jira-connector-test.atlassian.net"', - ), - click.Option( - ["--user-email"], - required=True, - type=str, - help="Email to authenticate into Atlassian (Jira) Cloud.", - ), - click.Option( - ["--projects"], - default=None, - type=DelimitedString(), - help="Comma-delimited Project ids or keys. Use Jira UI or the " - "API to find or obtain keys. Alternatively, use API to obtain ids.", - ), - click.Option( - ["--boards"], - default=None, - type=DelimitedString(), - help="Comma-delimited Board ids. Check board URL, or use the " - "API to find the board ids.", - ), - click.Option( - ["--issues"], - default=None, - type=DelimitedString(), - help="Comma-delimited Issue ids or keys. Use Jira UI or the API to " - "find or obtain keys. Alternatively, use API to obtain ids.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="jira", - cli_config=JiraCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/kafka.py b/unstructured/ingest/cli/cmds/kafka.py deleted file mode 100644 index afbad4888..000000000 --- a/unstructured/ingest/cli/cmds/kafka.py +++ /dev/null @@ -1,102 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliConfig -from unstructured.ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig - -CMD_NAME = "kafka" - - -@dataclass -class KafkaCliConfig(SimpleKafkaConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--bootstrap-server"], required=True, type=str, help="Broker server hostname" - ), - click.Option( - ["--port"], - required=True, - type=str, - help="The bootstrap port", - ), - click.Option( - ["--topic"], - required=True, - type=str, - help="The topic to write into.'", - ), - click.Option( - ["--kafka-api-key"], - required=False, - type=str, - help="The API KEY", - ), - click.Option( - ["--secret"], - required=False, - type=str, - help="The secret", - ), - click.Option( - ["--num-messages-to-consume"], - required=False, - type=int, - default=1, - help="The number of messages to consume before unblocking the consumer", - ), - click.Option( - ["--timeout"], - required=False, - type=float, - default=1.0, - help="Maximum time to block waiting for message(Seconds)", - ), - click.Option( - ["--confluent"], - required=False, - type=bool, - default=True, - help="Whether this Kafka instance is from Confluent", - ), - ] - return options - - -@dataclass -class KafkaCliWriteConfig(KafkaWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=4, - type=int, - help="Number of records per batch", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=KafkaCliConfig, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=KafkaCliConfig, - additional_cli_options=[KafkaCliWriteConfig], - write_config=KafkaWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/local.py b/unstructured/ingest/cli/cmds/local.py deleted file mode 100644 index ff70c44ca..000000000 --- a/unstructured/ingest/cli/cmds/local.py +++ /dev/null @@ -1,43 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, - DelimitedString, -) -from unstructured.ingest.connector.local import SimpleLocalConfig - - -@dataclass -class LocalCliConfig(SimpleLocalConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--input-path"], - required=True, - type=click.Path(file_okay=True, dir_okay=True, exists=True), - help="Path to the location in the local file system that will be processed.", - ), - click.Option( - ["--file-glob"], - default=None, - type=DelimitedString(), - help="A comma-separated list of file globs to limit which types of " - "local files are accepted, e.g. '*.html,*.txt'", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="local", - cli_config=LocalCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/mongodb.py b/unstructured/ingest/cli/cmds/mongodb.py deleted file mode 100644 index 6fbb5c365..000000000 --- a/unstructured/ingest/cli/cmds/mongodb.py +++ /dev/null @@ -1,72 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString -from unstructured.ingest.connector.mongodb import SimpleMongoDBConfig -from unstructured.ingest.interfaces import WriteConfig - -CMD_NAME = "mongodb" - - -@dataclass -class MongoDBCliConfig(SimpleMongoDBConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--uri"], - help="URI to user when connecting", - ), - click.Option( - ["--host"], - type=DelimitedString(), - help="hostname or IP address or Unix domain socket path of a single mongod or " - "mongos instance to connect to, or a list of hostnames", - ), - click.Option(["--port"], type=int, default=27017), - click.Option( - ["--database"], type=str, required=True, help="database name to connect to" - ), - click.Option( - ["--collection"], required=True, type=str, help="collection name to connect to" - ), - ] - return options - - -@dataclass -class MongoDBReadConfig(SimpleMongoDBConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=click.IntRange(0), - help="how many records to read at a time per process", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name=CMD_NAME, - cli_config=MongoDBCliConfig, - additional_cli_options=[MongoDBReadConfig], - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=MongoDBCliConfig, - write_config=WriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/notion.py b/unstructured/ingest/cli/cmds/notion.py deleted file mode 100644 index 02a9a30ed..000000000 --- a/unstructured/ingest/cli/cmds/notion.py +++ /dev/null @@ -1,48 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, - DelimitedString, -) -from unstructured.ingest.connector.notion.connector import SimpleNotionConfig - - -@dataclass -class NotionCliConfig(SimpleNotionConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--notion-api-key"], - required=True, - type=str, - help="API key for Notion api", - ), - click.Option( - ["--page-ids"], - default=None, - type=DelimitedString(), - help="Notion page IDs to pull text from", - ), - click.Option( - ["--database-ids"], - default=None, - type=DelimitedString(), - help="Notion database IDs to pull text from", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="notion", - cli_config=NotionCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/onedrive.py b/unstructured/ingest/cli/cmds/onedrive.py deleted file mode 100644 index 5bf671d9f..000000000 --- a/unstructured/ingest/cli/cmds/onedrive.py +++ /dev/null @@ -1,66 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, -) -from unstructured.ingest.connector.onedrive import SimpleOneDriveConfig - - -@dataclass -class OnedriveCliConfig(SimpleOneDriveConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--client-id"], - required=True, - type=str, - help="Microsoft app client ID", - ), - click.Option( - ["--client-cred"], - required=True, - type=str, - help="Microsoft App client secret", - ), - click.Option( - ["--user-pname"], - required=True, - type=str, - help="User principal name, usually is your Azure AD email.", - ), - click.Option( - ["--tenant"], - default="common", - type=str, - help="ID or domain name associated with your Azure AD instance", - ), - click.Option( - ["--path"], - default=None, - type=str, - help="Folder to start parsing files from.", - ), - click.Option( - ["--authority-url"], - default="https://login.microsoftonline.com", - type=str, - help="Authentication token provider for Microsoft apps, default is " - "https://login.microsoftonline.com", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="onedrive", - cli_config=OnedriveCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/opensearch.py b/unstructured/ingest/cli/cmds/opensearch.py deleted file mode 100644 index 0f135de15..000000000 --- a/unstructured/ingest/cli/cmds/opensearch.py +++ /dev/null @@ -1,117 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.cmds.elasticsearch import ElasticsearchCliWriteConfig -from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString -from unstructured.ingest.connector.opensearch import SimpleOpenSearchConfig - -CMD_NAME = "opensearch" - - -@dataclass -class OpenSearchCliConfig(SimpleOpenSearchConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--index-name"], - required=True, - type=str, - help="Name of the OpenSearch index to pull data from, or upload data to.", - ), - click.Option( - ["--hosts"], - type=DelimitedString(), - help='List of the OpenSearch hosts to connect to, e.g. "http://localhost:9200"', - ), - click.Option( - ["--fields"], - type=DelimitedString(), - default=[], - help="If provided, will limit the fields returned by OpenSearch " - "to this comma-delimited list", - ), - click.Option( - ["--username"], type=str, default=None, help="username when using basic auth" - ), - click.Option( - ["--password"], - type=str, - default=None, - help="password when using basic auth", - ), - click.Option( - ["--use-ssl"], - type=bool, - default=False, - is_flag=True, - help="use ssl for the connection", - ), - click.Option( - ["--verify-certs"], - type=bool, - default=False, - is_flag=True, - help="whether to verify SSL certificates", - ), - click.Option( - ["--ssl-show-warn"], - type=bool, - default=False, - is_flag=True, - help="show warning when verify certs is disabled", - ), - click.Option( - ["--ca-certs"], - type=click.Path(), - default=None, - help="path to CA bundle", - ), - click.Option( - ["--client-cert"], - type=click.Path(), - default=None, - help="path to the file containing the private key and the certificate," - " or cert only if using client_key", - ), - click.Option( - ["--client-key"], - type=click.Path(), - default=None, - help="path to the file containing the private key" - " if using separate cert and key files", - ), - click.Option( - ["--batch-size"], - default=100, - type=click.IntRange(0), - help="how many records to read at a time per process", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="opensearch", - cli_config=OpenSearchCliConfig, - ) - return cmd_cls - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="opensearch", - cli_config=OpenSearchCliConfig, - additional_cli_options=[ElasticsearchCliWriteConfig], - addition_configs={ - "connector_config": SimpleOpenSearchConfig, - "write_config": ElasticsearchCliWriteConfig, - }, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/outlook.py b/unstructured/ingest/cli/cmds/outlook.py deleted file mode 100644 index 7b4e66968..000000000 --- a/unstructured/ingest/cli/cmds/outlook.py +++ /dev/null @@ -1,67 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, - DelimitedString, -) -from unstructured.ingest.connector.outlook import SimpleOutlookConfig - - -@dataclass -class OutlookCliConfig(SimpleOutlookConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--client-id"], - required=True, - type=str, - help="Microsoft app client ID", - ), - click.Option( - ["--user-email"], - required=True, - type=str, - help="Outlook email to download messages from.", - ), - click.Option( - ["--tenant"], - default="common", - help="ID or domain name associated with your Azure AD instance", - ), - click.Option( - ["--outlook-folders"], - default=None, - type=DelimitedString(), - help="Folders to download email messages from. " - "Do not specify subfolders. Use quotes if spaces in folder names.", - ), - click.Option( - ["--client-cred"], - default=None, - type=str, - help="Microsoft App client secret", - ), - click.Option( - ["--authority-url"], - default="https://login.microsoftonline.com", - type=str, - help="Authentication token provider for Microsoft apps, default is " - "https://login.microsoftonline.com", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="outlook", - cli_config=OutlookCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/pinecone.py b/unstructured/ingest/cli/cmds/pinecone.py deleted file mode 100644 index 91d476669..000000000 --- a/unstructured/ingest/cli/cmds/pinecone.py +++ /dev/null @@ -1,71 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig - - -@dataclass -class PineconeCliConfig(SimplePineconeConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--api-key"], - required=True, - type=str, - help="API key used for authenticating to a Pinecone instance.", - envvar="PINECONE_API_KEY", - show_envvar=True, - ), - click.Option( - ["--index-name"], - required=True, - type=str, - help="The name of the pinecone index to connect to.", - ), - click.Option( - ["--environment"], - required=True, - type=str, - help="The environment where the index lives. Eg. 'gcp-starter' or 'us-east1-gcp'", - ), - ] - return options - - -@dataclass -class PineconeCliWriteConfig(PineconeWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=50, - type=int, - help="Number of records per batch", - ), - click.Option( - ["--num-processes"], - default=2, - type=int, - help="Number of parallel processes with which to upload elements", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="pinecone", - cli_config=PineconeCliConfig, - additional_cli_options=[PineconeCliWriteConfig], - write_config=PineconeWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/qdrant.py b/unstructured/ingest/cli/cmds/qdrant.py deleted file mode 100644 index 1a0847614..000000000 --- a/unstructured/ingest/cli/cmds/qdrant.py +++ /dev/null @@ -1,124 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig - - -@dataclass -class QdrantCliConfig(SimpleQdrantConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--collection-name"], - required=True, - type=str, - help="The name of the Qdrant collection to use.", - ), - click.Option( - ["--location"], - type=str, - help="The location of the Qdrant cluster.", - ), - click.Option( - ["--url"], - type=str, - help="The location of the Qdrant cluster.", - ), - click.Option( - ["--port"], - type=int, - default=6333, - help="Port of the REST API interface. Default: 6333.", - ), - click.Option( - ["--grpc-port"], - type=int, - default=6334, - help="Port of the gRPC interface. Default: 6334.", - ), - click.Option( - ["--prefer-grpc"], - type=bool, - is_flag=True, - help="Whether to use gPRC interface whenever possible in methods. Default: False.", - ), - click.Option( - ["--https"], - type=bool, - is_flag=True, - help="Whether to use HTTPS(SSL) protocol. Default: False.", - ), - click.Option( - ["--prefix"], - type=str, - help="Prefix to add the REST API endpoints.", - ), - click.Option( - ["--timeout"], - type=int, - help="Timeout for operations. Default: 5.0 seconds for REST, unlimited for gRPC.", - ), - click.Option( - ["--host"], - type=str, - help="Host name of the Qdrant service.", - ), - click.Option( - ["--path"], - type=str, - help="Persistence path for QdrantLocal.", - ), - click.Option( - ["--force-disable-check-same-thread"], - type=bool, - is_flag=True, - help="Whether to force disable check same thread for QdrantLocal.", - ), - click.Option( - ["--api-key"], - type=str, - help="API key for authentication in Qdrant Cloud. Default: None.", - envvar="QDRANT_API_KEY", - show_envvar=True, - ), - ] - return options - - -@dataclass -class QdrantCliWriteConfig(QdrantWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=50, - type=int, - help="Number of points to upload per batch", - ), - click.Option( - ["--num-processes"], - default=2, - type=int, - help="Number of parallel processes with which to upload", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="qdrant", - cli_config=QdrantCliConfig, - additional_cli_options=[QdrantCliWriteConfig], - write_config=QdrantWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/reddit.py b/unstructured/ingest/cli/cmds/reddit.py deleted file mode 100644 index 067b74250..000000000 --- a/unstructured/ingest/cli/cmds/reddit.py +++ /dev/null @@ -1,67 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.reddit import SimpleRedditConfig - - -@dataclass -class RedditCliConfig(SimpleRedditConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--client-id"], - required=True, - type=str, - help="The client ID, see " - "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites" # noqa: E501 - " for more information.", - ), - click.Option( - ["--client-secret"], - required=True, - type=str, - help="The client secret, see " - "https://praw.readthedocs.io/en/stable/getting_started/quick_start.html#prerequisites" # noqa: E501 - " for more information.", - ), - click.Option( - ["--subreddit-name"], - required=True, - type=str, - help='The name of a subreddit, without the "r\\", e.g. "machinelearning"', - ), - click.Option( - ["--search-query"], - default=None, - type=str, - help="If set, return posts using this query. Otherwise, use hot posts.", - ), - click.Option( - ["--num-posts"], - required=True, - type=click.IntRange(0), - help="If set, limits the number of posts to pull in.", - ), - click.Option( - ["--user-agent"], - required=True, - type=str, - help="user agent request header to use when calling Reddit API", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="reddit", - cli_config=RedditCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/salesforce.py b/unstructured/ingest/cli/cmds/salesforce.py deleted file mode 100644 index a6d7119a1..000000000 --- a/unstructured/ingest/cli/cmds/salesforce.py +++ /dev/null @@ -1,58 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, - DelimitedString, -) -from unstructured.ingest.connector.salesforce import SimpleSalesforceConfig - - -@dataclass -class SalesforceCliConfig(SimpleSalesforceConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - possible_categories = ["Account", "Case", "Campaign", "EmailMessage", "Lead"] - options = [ - click.Option( - ["--username"], - required=True, - type=str, - help="Salesforce username usually looks like an email.", - ), - click.Option( - ["--consumer-key"], - required=True, - type=str, - help="For the Salesforce JWT auth. Found in Consumer Details.", - ), - click.Option( - ["--private-key"], - required=True, - type=str, - help="Path to the private key or its contents for the Salesforce JWT auth. " - "Key file is usually named server.key.", - ), - click.Option( - ["--categories"], - default=None, - required=True, - type=DelimitedString(choices=possible_categories), - help="Comma-delimited salesforce categories to download. " - "Currently only {}.".format(", ".join(possible_categories)), - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="salesforce", - cli_config=SalesforceCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/sharepoint.py b/unstructured/ingest/cli/cmds/sharepoint.py deleted file mode 100644 index 5c6185eef..000000000 --- a/unstructured/ingest/cli/cmds/sharepoint.py +++ /dev/null @@ -1,66 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - CliRecursiveConfig, -) -from unstructured.ingest.connector.sharepoint import SimpleSharepointConfig - - -@dataclass -class SharepointCliConfig(SimpleSharepointConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--client-id"], - default=None, - type=str, - help="Sharepoint app client ID", - ), - click.Option( - ["--client-cred"], - default=None, - type=str, - help="Sharepoint app secret", - ), - click.Option( - ["--site"], - default=None, - type=str, - help="Sharepoint site url. Process either base url e.g \ - https://[tenant].sharepoint.com or relative sites \ - https://[tenant].sharepoint.com/sites/. \ - To process all sites within the tenant pass a site url as \ - https://[tenant]-admin.sharepoint.com.\ - This requires the app to be registered at a tenant level", - ), - click.Option( - ["--path"], - default="Shared Documents", - type=str, - help="Path from which to start parsing files. If the connector is to \ - process all sites within the tenant this filter will be applied to \ - all sites document libraries. Default 'Shared Documents'", - ), - click.Option( - ["--files-only"], - is_flag=True, - default=False, - help="Process only files.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="sharepoint", - cli_config=SharepointCliConfig, - additional_cli_options=[CliRecursiveConfig], - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/slack.py b/unstructured/ingest/cli/cmds/slack.py deleted file mode 100644 index 7112849e1..000000000 --- a/unstructured/ingest/cli/cmds/slack.py +++ /dev/null @@ -1,56 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, - DelimitedString, -) -from unstructured.ingest.connector.slack import SimpleSlackConfig - - -@dataclass -class SlackCliConfig(SimpleSlackConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--token"], - required=True, - type=str, - help="Bot token used to access Slack API, must have channels:history " - "scope for the bot user", - ), - click.Option( - ["--channels"], - required=True, - type=DelimitedString(), - help="Comma-delimited list of Slack channel IDs to pull messages from, " - "can be a public or private channel", - ), - click.Option( - ["--start-date"], - default=None, - type=str, - help="Start date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or " - "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz", - ), - click.Option( - ["--end-date"], - default=None, - type=str, - help="End date/time in formats YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or " - "YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SStz", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="slack", - cli_config=SlackCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/sql.py b/unstructured/ingest/cli/cmds/sql.py deleted file mode 100644 index 7b4800e55..000000000 --- a/unstructured/ingest/cli/cmds/sql.py +++ /dev/null @@ -1,66 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig -from unstructured.ingest.connector.sql import SimpleSqlConfig -from unstructured.ingest.interfaces import WriteConfig - -SQL_DRIVERS = {"postgresql", "sqlite"} - - -@dataclass -class SqlCliConfig(SimpleSqlConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--db-type"], - required=True, - type=click.Choice(SQL_DRIVERS), - help="Type of the database backend", - ), - click.Option( - ["--username"], - default=None, - type=str, - help="DB username", - ), - click.Option( - ["--password"], - default=None, - type=str, - help="DB password", - ), - click.Option( - ["--host"], - default=None, - type=str, - help="DB host", - ), - click.Option( - ["--port"], - default=None, - type=int, - help="DB host connection port", - ), - click.Option( - ["--database"], - default=None, - type=str, - help="Database name. For sqlite databases, this is the path to the .db file.", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="sql", - cli_config=SqlCliConfig, - write_config=WriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/vectara.py b/unstructured/ingest/cli/cmds/vectara.py deleted file mode 100644 index 0c623362b..000000000 --- a/unstructured/ingest/cli/cmds/vectara.py +++ /dev/null @@ -1,66 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig -from unstructured.ingest.connector.vectara import SimpleVectaraConfig, WriteConfig - - -@dataclass -class VectaraCliWriteConfig(SimpleVectaraConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--customer-id"], - required=True, - type=str, - help="The Vectara customer-id.", - envvar="VECTARA_CUSTOMER_ID", - show_envvar=True, - ), - click.Option( - ["--oauth-client-id"], - required=True, - type=str, - help="Vectara OAuth2 client ID.", - envvar="VECTARA_OAUTH_CLIENT_ID", - show_envvar=True, - ), - click.Option( - ["--oauth-secret"], - required=True, - type=str, - help="Vectara OAuth2 secret.", - envvar="VECTARA_OAUTH_SECRET", - show_envvar=True, - ), - click.Option( - ["--corpus-name"], - required=False, - type=str, - default=None, - help="The Vectara corpus-name.", - ), - click.Option( - ["--token-url"], - required=False, - default="https://vectara-prod-{}.auth.us-west-2.amazoncognito.com/oauth2/token", - type=str, - help="The Vectara endpoint for token refresh. Needs curly brackets for customer_id", - ), - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name="vectara", - cli_config=VectaraCliWriteConfig, - additional_cli_options=[], - write_config=WriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/weaviate.py b/unstructured/ingest/cli/cmds/weaviate.py deleted file mode 100644 index 69107a9c2..000000000 --- a/unstructured/ingest/cli/cmds/weaviate.py +++ /dev/null @@ -1,98 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.interfaces import CliConfig, DelimitedString -from unstructured.ingest.connector.weaviate import SimpleWeaviateConfig, WeaviateWriteConfig - -CMD_NAME = "weaviate" - - -@dataclass -class WeaviateCliConfig(SimpleWeaviateConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--host-url"], - required=True, - help="Weaviate instance url", - ), - click.Option( - ["--class-name"], - default=None, - type=str, - help="Name of the class to push the records into, e.g: Pdf-elements", - ), - click.Option( - ["--access-token"], default=None, type=str, help="Used to create the bearer token." - ), - click.Option( - ["--refresh-token"], - default=None, - type=str, - help="Will tie this value to the bearer token. If not provided, " - "the authentication will expire once the lifetime of the access token is up.", - ), - click.Option( - ["--api-key"], - default=None, - type=str, - ), - click.Option( - ["--client-secret"], - default=None, - type=str, - ), - click.Option( - ["--scope"], - default=None, - type=DelimitedString(), - ), - click.Option( - ["--username"], - default=None, - type=str, - ), - click.Option( - ["--password"], - default=None, - type=str, - ), - click.Option( - ["--anonymous"], - is_flag=True, - default=False, - type=bool, - help="if set, all auth values will be ignored", - ), - ] - return options - - -@dataclass -class WeaviateCliWriteConfig(WeaviateWriteConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ) - ] - return options - - -def get_base_dest_cmd(): - from unstructured.ingest.cli.base.dest import BaseDestCmd - - cmd_cls = BaseDestCmd( - cmd_name=CMD_NAME, - cli_config=WeaviateCliConfig, - additional_cli_options=[WeaviateCliWriteConfig], - write_config=WeaviateWriteConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/cmds/wikipedia.py b/unstructured/ingest/cli/cmds/wikipedia.py deleted file mode 100644 index a25f5c44c..000000000 --- a/unstructured/ingest/cli/cmds/wikipedia.py +++ /dev/null @@ -1,40 +0,0 @@ -import typing as t -from dataclasses import dataclass - -import click - -from unstructured.ingest.cli.base.src import BaseSrcCmd -from unstructured.ingest.cli.interfaces import ( - CliConfig, -) -from unstructured.ingest.connector.wikipedia import SimpleWikipediaConfig - - -@dataclass -class WikipediaCliConfig(SimpleWikipediaConfig, CliConfig): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--page-title"], - required=True, - type=str, - help='Title of a Wikipedia page, e.g. "Open source software".', - ), - click.Option( - ["--auto-suggest"], - default=True, - is_flag=True, - help="Whether to automatically suggest a page if the exact page was not found." - " Set to False if the wrong Wikipedia page is fetched.", - ), - ] - return options - - -def get_base_src_cmd() -> BaseSrcCmd: - cmd_cls = BaseSrcCmd( - cmd_name="wikipedia", - cli_config=WikipediaCliConfig, - ) - return cmd_cls diff --git a/unstructured/ingest/cli/common.py b/unstructured/ingest/cli/common.py deleted file mode 100644 index 53dacafaf..000000000 --- a/unstructured/ingest/cli/common.py +++ /dev/null @@ -1,7 +0,0 @@ -import logging - -from unstructured.ingest.logger import ingest_log_streaming_init - - -def log_options(options: dict, verbose=False): - ingest_log_streaming_init(logging.DEBUG if verbose else logging.INFO) diff --git a/unstructured/ingest/cli/interfaces.py b/unstructured/ingest/cli/interfaces.py deleted file mode 100644 index 4703a1c47..000000000 --- a/unstructured/ingest/cli/interfaces.py +++ /dev/null @@ -1,656 +0,0 @@ -from __future__ import annotations - -import json -import os.path -import typing as t -from abc import abstractmethod -from dataclasses import fields -from gettext import gettext, ngettext -from pathlib import Path - -import click -from dataclasses_json.core import Json -from typing_extensions import Self - -from unstructured.chunking import CHUNK_MAX_CHARS_DEFAULT, CHUNK_MULTI_PAGE_DEFAULT -from unstructured.ingest.interfaces import ( - BaseConfig, - ChunkingConfig, - EmbeddingConfig, - FileStorageConfig, - PartitionConfig, - PermissionsConfig, - ProcessorConfig, - ReadConfig, - RetryStrategyConfig, -) - - -class Dict(click.ParamType): - name = "dict" - - def convert( - self, - value: t.Any, - param: t.Optional[click.Parameter] = None, - ctx: t.Optional[click.Context] = None, - ) -> t.Any: - try: - return json.loads(value) - except json.JSONDecodeError: - self.fail( - gettext( - "{value} is not a valid json value.", - ).format(value=value), - param, - ctx, - ) - - -class FileOrJson(click.ParamType): - name = "file-or-json" - - def __init__(self, allow_raw_str: bool = False): - self.allow_raw_str = allow_raw_str - - def convert( - self, - value: t.Any, - param: t.Optional[click.Parameter] = None, - ctx: t.Optional[click.Context] = None, - ) -> t.Any: - # check if valid file - full_path = os.path.abspath(os.path.expanduser(value)) - if os.path.isfile(full_path): - return str(Path(full_path).resolve()) - if isinstance(value, str): - try: - return json.loads(value) - except json.JSONDecodeError: - if self.allow_raw_str: - return value - self.fail( - gettext( - "{value} is not a valid json string nor an existing filepath.", - ).format(value=value), - param, - ctx, - ) - - -class DelimitedString(click.ParamType): - name = "delimited-string" - - def __init__(self, delimiter: str = ",", choices: t.Optional[t.List[str]] = None): - self.choices = choices if choices else [] - self.delimiter = delimiter - - def convert( - self, - value: t.Any, - param: t.Optional[click.Parameter] = None, - ctx: t.Optional[click.Context] = None, - ) -> t.Any: - # In case a list is provided as the default, will not break - if isinstance(value, list): - split = [str(v).strip() for v in value] - else: - split = [v.strip() for v in value.split(self.delimiter)] - if not self.choices: - return split - choices_str = ", ".join(map(repr, self.choices)) - for s in split: - if s not in self.choices: - self.fail( - ngettext( - "{value!r} is not {choice}.", - "{value!r} is not one of {choices}.", - len(self.choices), - ).format(value=s, choice=choices_str, choices=choices_str), - param, - ctx, - ) - return split - - -class CliMixin: - @staticmethod - @abstractmethod - def get_cli_options() -> t.List[click.Option]: - pass - - @classmethod - def add_cli_options(cls, cmd: click.Command) -> None: - options_to_add = cls.get_cli_options() - CliMixin.add_params(cmd, params=options_to_add) - - def add_params(cmd: click.Command, params: t.List[click.Parameter]): - existing_opts = [] - for param in cmd.params: - existing_opts.extend(param.opts) - - for param in params: - for opt in param.opts: - if opt in existing_opts: - raise ValueError(f"{opt} is already defined on the command {cmd.name}") - existing_opts.append(opt) - cmd.params.append(param) - - -class CliConfig(BaseConfig, CliMixin): - pass - - -class CliRetryStrategyConfig(RetryStrategyConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--max-retries"], - default=None, - type=int, - help="If provided, will use this max retry for " - "back off strategy if http calls fail", - ), - click.Option( - ["--max-retry-time"], - default=None, - type=float, - help="If provided, will attempt retries for this long as part " - "of back off strategy if http calls fail", - ), - ] - return options - - @classmethod - def from_dict(cls, kvs: Json, **kwargs): - """ - Return None if none of the fields are being populated - """ - if isinstance(kvs, dict): - field_names = {field.name for field in fields(cls) if field.name in kvs} - field_values = [kvs.get(n) for n in field_names if kvs.get(n)] - if not field_values: - return None - return super().from_dict(kvs=kvs, **kwargs) - - -class CliProcessorConfig(ProcessorConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--reprocess"], - is_flag=True, - default=False, - help="Reprocess a downloaded file even if the relevant structured " - "output .json file in output directory already exists.", - ), - click.Option( - ["--output-dir"], - default="structured-output", - help="Where to place structured output .json files.", - ), - click.Option( - ["--work-dir"], - type=str, - default=str( - (Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve(), - ), - show_default=True, - help="Where to place working files when processing each step", - ), - click.Option( - ["--num-processes"], - default=2, - show_default=True, - help="Number of parallel processes with which to process docs", - ), - click.Option( - ["--raise-on-error"], - is_flag=True, - default=False, - help="Is set, will raise error if any doc in the pipeline fail. Otherwise will " - "log error and continue with other docs", - ), - click.Option(["-v", "--verbose"], is_flag=True, default=False), - ] - return options - - -class CliReadConfig(ReadConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - click.Option( - ["--re-download"], - is_flag=True, - default=False, - help="Re-download files even if they are already present in download dir.", - ), - click.Option( - ["--preserve-downloads"], - is_flag=True, - default=False, - help="Preserve downloaded files. Otherwise each file is removed " - "after being processed successfully.", - ), - click.Option( - ["--download-only"], - is_flag=True, - default=False, - help="Download any files that are not already present in either --download-dir or " - "the default download ~/.cache/... location in case --download-dir " - "is not specified and " - "skip processing them through unstructured.", - ), - click.Option( - ["--max-docs"], - default=None, - type=int, - help="If specified, process at most the specified number of documents.", - ), - ] - return options - - -class CliPartitionConfig(PartitionConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--pdf-infer-table-structure"], - is_flag=True, - default=False, - help="Partition will include the table's text_as_html " "in the response metadata.", - ), - click.Option( - ["--strategy"], - default="auto", - help="The method that will be used to process the documents. " - "Default: auto. Other strategies include `fast` and `hi_res`.", - ), - click.Option( - ["--ocr-languages"], - default=None, - type=DelimitedString(delimiter="+"), - help="A list of language packs to specify which languages to use for OCR, " - "separated by '+' e.g. 'eng+deu' to use the English and German language packs. " - "The appropriate Tesseract " - "language pack needs to be installed.", - ), - click.Option( - ["--encoding"], - default=None, - help="Text encoding to use when reading documents. By default the encoding is " - "detected automatically.", - ), - click.Option( - ["--skip-infer-table-types"], - type=DelimitedString(), - default=None, - help="Optional list of document types to skip table extraction on", - ), - click.Option( - ["--additional-partition-args"], - type=Dict(), - help="A json string representation of values to pass through to partition()", - ), - click.Option( - ["--fields-include"], - type=DelimitedString(), - default=["element_id", "text", "type", "metadata", "embeddings"], - help="Comma-delimited list. If set, include the specified top-level " - "fields in an element.", - ), - click.Option( - ["--flatten-metadata"], - is_flag=True, - default=False, - help="Results in flattened json elements. " - "Specifically, the metadata key values are brought to " - "the top-level of the element, and the `metadata` key itself is removed.", - ), - click.Option( - ["--metadata-include"], - default=[], - type=DelimitedString(), - help="Comma-delimited list. If set, include the specified metadata " - "fields if they exist and drop all other fields. ", - ), - click.Option( - ["--metadata-exclude"], - default=[], - type=DelimitedString(), - help="Comma-delimited list. If set, drop the specified metadata " - "fields if they exist.", - ), - click.Option( - ["--partition-by-api"], - is_flag=True, - default=False, - help="Use a remote API to partition the files." - " Otherwise, use the function from partition.auto", - ), - click.Option( - ["--partition-endpoint"], - default="https://api.unstructured.io/general/v0/general", - help="If partitioning via api, use the following host. " - "Default: https://api.unstructured.io/general/v0/general", - ), - click.Option( - ["--api-key"], - default=None, - help="API Key for partition endpoint.", - ), - click.Option( - ["--hi-res-model-name"], - default=None, - help="Model name for hi-res strategy.", - ), - ] - return options - - -class CliRecursiveConfig(CliConfig): - recursive: bool - - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - ] - return options - - -class CliFilesStorageConfig(FileStorageConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--remote-url"], - required=True, - help="Remote fsspec URL formatted as `protocol://dir/path`", - ), - click.Option( - ["--uncompress"], - type=bool, - default=False, - is_flag=True, - help="Uncompress any archived files. Currently supporting zip and tar " - "files based on file extension.", - ), - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - click.Option( - ["--file-glob"], - default=None, - type=DelimitedString(), - help="A comma-separated list of file globs to limit which types of " - "local files are accepted, e.g. '*.html,*.txt'", - ), - ] - return options - - -class CliEmbeddingConfig(EmbeddingConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - from unstructured.embed import EMBEDDING_PROVIDER_TO_CLASS_MAP - - options = [ - click.Option( - ["--embedding-provider"], - help="Type of the embedding class to be used. Can be one of: " - f"{list(EMBEDDING_PROVIDER_TO_CLASS_MAP)}", - type=click.Choice(list(EMBEDDING_PROVIDER_TO_CLASS_MAP)), - ), - click.Option( - ["--embedding-api-key"], - help="API key for the embedding model, for the case an API key is needed.", - type=str, - default=None, - ), - click.Option( - ["--embedding-model-name"], - help="Embedding model name, if needed. " - "Chooses a particular LLM between different options, to embed with it.", - type=str, - default=None, - ), - click.Option( - ["--embedding-aws-access-key-id"], - help="AWS access key used for AWS-based embedders, such as bedrock", - type=str, - default=None, - ), - click.Option( - ["--embedding-aws-secret-access-key"], - help="AWS secret key used for AWS-based embedders, such as bedrock", - type=str, - default=None, - ), - click.Option( - ["--embedding-aws-region"], - help="AWS region used for AWS-based embedders, such as bedrock", - type=str, - default="us-west-2", - ), - ] - return options - - @classmethod - def from_dict(cls, kvs: Json, **kwargs): - """ - Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params. - This allows CLI arguments to be prepended with embedding_ during CLI invocation but - doesn't require that as part of the field names in this class - """ - if isinstance(kvs, dict): - new_kvs = { - k[len("embedding_") :]: v # noqa: E203 - for k, v in kvs.items() - if k.startswith("embedding_") - } - if len(new_kvs.keys()) == 0: - return None - if not new_kvs.get("provider"): - return None - return super().from_dict(new_kvs, **kwargs) - return super().from_dict(kvs, **kwargs) - - -class CliChunkingConfig(ChunkingConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--chunk-elements"], - is_flag=True, - default=False, - help="Deprecated, use --chunking-strategy instead.", - ), - click.Option( - ["--chunking-strategy"], - type=str, - help="The rule-set to use to form chunks. Omit to disable chunking.", - ), - click.Option( - ["--chunk-combine-text-under-n-chars"], - type=int, - help=( - "Combine consecutive chunks when the first does not exceed this length and" - " the second will fit without exceeding the hard-maximum length. Only" - " operative for 'by_title' chunking-strategy." - ), - ), - click.Option( - ["--chunk-include-orig-elements/--chunk-no-include-orig-elements"], - is_flag=True, - default=True, - help=( - "When chunking, add the original elements consolidated to form each chunk to" - " `.metadata.orig_elements` on that chunk." - ), - ), - click.Option( - ["--chunk-max-characters"], - type=int, - default=CHUNK_MAX_CHARS_DEFAULT, - show_default=True, - help=( - "Hard maximum chunk length. No chunk will exceed this length. An oversized" - " element will be divided by text-splitting to fit this window." - ), - ), - click.Option( - ["--chunk-multipage-sections/--chunk-no-multipage-sections"], - is_flag=True, - default=CHUNK_MULTI_PAGE_DEFAULT, - help=( - "Ignore page boundaries when chunking such that elements from two different" - " pages can appear in the same chunk. Only operative for 'by_title'" - " chunking-strategy." - ), - ), - click.Option( - ["--chunk-new-after-n-chars"], - type=int, - help=( - "Soft-maximum chunk length. Another element will not be added to a chunk of" - " this length even when it would fit without exceeding the hard-maximum" - " length." - ), - ), - click.Option( - ["--chunk-overlap"], - type=int, - default=0, - show_default=True, - help=( - "Prefix chunk text with last overlap=N characters of prior chunk. Only" - " applies to oversized chunks divided by text-splitting. To apply overlap to" - " non-oversized chunks use the --overlap-all option." - ), - ), - click.Option( - ["--chunk-overlap-all"], - is_flag=True, - default=False, - help=( - "Apply overlap to chunks formed from whole elements as well as those formed" - " by text-splitting oversized elements. Overlap length is take from --overlap" - " option value." - ), - ), - ] - return options - - @classmethod - def from_dict(cls, kvs: Json, **kwargs: t.Any) -> t.Optional[Self]: - """Extension of dataclass from_dict() to avoid a naming conflict with other CLI params. - - This allows CLI arguments to be prefixed with "chunking_" during CLI invocation but doesn't - require that as part of the field names in this class - """ - if not isinstance(kvs, dict): - return super().from_dict(kvs=kvs, **kwargs) - - options: t.Dict[str, t.Any] = kvs.copy() - chunk_elements = options.pop("chunk_elements", None) - chunking_strategy = options.pop("chunking_strategy", None) - # -- when neither are specified, chunking is not requested -- - if not chunk_elements and not chunking_strategy: - return None - - def iter_kv_pairs() -> t.Iterator[t.Tuple[str, t.Any]]: - # -- newer `chunking_strategy` option takes precedence over legacy `chunk_elements` -- - if chunking_strategy: - yield "chunking_strategy", chunking_strategy - # -- but legacy case is still supported, equivalent to `chunking_strategy="by_title" -- - elif chunk_elements: - yield "chunking_strategy", "by_title" - - yield from ( - (key[len("chunk_") :], value) - for key, value in options.items() - if key.startswith("chunk_") - ) - - new_kvs = dict(iter_kv_pairs()) - return None if len(new_kvs) == 0 else super().from_dict(kvs=new_kvs, **kwargs) - - -class CliPermissionsConfig(PermissionsConfig, CliMixin): - @staticmethod - def get_cli_options() -> t.List[click.Option]: - options = [ - click.Option( - ["--permissions-application-id"], - type=str, - help="Microsoft Graph API application id", - ), - click.Option( - ["--permissions-client-cred"], - type=str, - help="Microsoft Graph API application credentials", - ), - click.Option( - ["--permissions-tenant"], - type=str, - help="e.g https://contoso.onmicrosoft.com to get permissions data within tenant.", - ), - ] - return options - - @classmethod - def from_dict(cls, kvs: Json, **kwargs): - """ - Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params. - This allows CLI arguments to be prepended with permissions_ during CLI invocation but - doesn't require that as part of the field names in this class. It also checks if the - CLI params are provided as intended. - """ - - if isinstance(kvs, dict): - permissions_application_id = kvs.get("permissions_application_id") - permissions_client_cred = kvs.get("permissions_client_cred") - permissions_tenant = kvs.get("permissions_tenant") - permission_values = [ - permissions_application_id, - permissions_client_cred, - permissions_tenant, - ] - if any(permission_values) and not all(permission_values): - raise ValueError( - "Please provide either none or all of the following optional values:\n" - "--permissions-application-id\n" - "--permissions-client-cred\n" - "--permissions-tenant", - ) - - new_kvs = { - k[len("permissions_") :]: v # noqa: E203 - for k, v in kvs.items() - if k.startswith("permissions_") - } - if len(new_kvs.keys()) == 0: - return None - return super().from_dict(kvs=new_kvs, **kwargs) - return super().from_dict(kvs=kvs, **kwargs) diff --git a/unstructured/ingest/cli/utils.py b/unstructured/ingest/cli/utils.py deleted file mode 100644 index 701355f26..000000000 --- a/unstructured/ingest/cli/utils.py +++ /dev/null @@ -1,205 +0,0 @@ -import typing as t -from dataclasses import fields, is_dataclass -from gettext import gettext as _ - -import click - -from unstructured.ingest.cli.interfaces import ( - CliChunkingConfig, - CliConfig, - CliEmbeddingConfig, - CliPartitionConfig, - CliPermissionsConfig, - CliProcessorConfig, - CliReadConfig, - CliRetryStrategyConfig, -) -from unstructured.ingest.interfaces import BaseConfig -from unstructured.ingest.logger import logger - - -def conform_click_options(options: dict): - # Click sets all multiple fields as tuple, this needs to be updated to list - for k, v in options.items(): - if isinstance(v, tuple): - options[k] = list(v) - - -def extract_config(flat_data: dict, config: t.Type[BaseConfig]) -> BaseConfig: - """ - To be able to extract a nested dataclass from a flat dictionary (as in one coming - from a click-based options input), the config class is dynamically looked through for - nested dataclass fields and new nested dictionaries are created to conform to the - shape the overall class expects whn parsing from a dict. During the process, this will create - copies of the original dictionary to avoid pruning fields but this isn't a - problem since the `from_dict()` method ignores unneeded values. - - Not handling more complex edge cases for now such as nested types i.e Union[List[List[...]]] - """ - - def conform_dict(inner_d: dict, inner_config: t.Type[BaseConfig]): - # Catch edge cases (i.e. Dict[str, ...]) where underlying type is not a concrete Class, - # causing 'issubclass() arg 1 must be a class' errors, return False - def is_subclass(instance, class_type) -> bool: - try: - return issubclass(instance, class_type) - except Exception: - return False - - dd = inner_d.copy() - for field in fields(inner_config): - f_type = field.type - # Handle the case where the type of a value if a Union (possibly optional) - if t.get_origin(f_type) is t.Union: - union_values = t.get_args(f_type) - # handle List types - union_values = [ - t.get_args(u)[0] if t.get_origin(u) is list else u for u in union_values - ] - # Ignore injected NoneType when optional - concrete_union_values = [v for v in union_values if not is_subclass(v, type(None))] - dataclass_union_values = [v for v in concrete_union_values if is_dataclass(v)] - non_dataclass_union_values = [ - v for v in concrete_union_values if not is_dataclass(v) - ] - if not dataclass_union_values: - continue - # Check if the key for this field already exists in the dictionary, - # if so it might map to one of these non dataclass fields and this - # can't be enforced - if non_dataclass_union_values and field.name in dd: - continue - if len(dataclass_union_values) > 1: - logger.warning( - "more than one dataclass type possible for field {}, " - "not extracting: {}".format(field.name, ", ".join(dataclass_union_values)) - ) - continue - f_type = dataclass_union_values[0] - origin = t.get_origin(f_type) - if origin: - f_type = origin - if is_subclass(f_type, BaseConfig): - dd[field.name] = conform_dict(inner_d=dd, inner_config=f_type) - return dd - - adjusted_dict = conform_dict(inner_d=flat_data, inner_config=config) - return config.from_dict(adjusted_dict, apply_name_overload=False) - - -def extract_configs( - data: dict, - extras: t.Optional[t.Dict[str, t.Type[BaseConfig]]] = None, - validate: t.Optional[t.List[t.Type[BaseConfig]]] = None, - add_defaults: bool = True, -) -> t.Dict[str, BaseConfig]: - """ - Extract all common configs used across CLI command and validate that any - command-specific configs have all their needed information from the Click - options that are passed in during invocation. - """ - validate = validate if validate else [] - res = ( - { - "read_config": extract_config(flat_data=data, config=CliReadConfig), - "partition_config": extract_config(flat_data=data, config=CliPartitionConfig), - "embedding_config": extract_config(flat_data=data, config=CliEmbeddingConfig), - "chunking_config": extract_config(flat_data=data, config=CliChunkingConfig), - "processor_config": extract_config(flat_data=data, config=CliProcessorConfig), - "permissions_config": extract_config(flat_data=data, config=CliPermissionsConfig), - "retry_strategy_config": extract_config(flat_data=data, config=CliRetryStrategyConfig), - } - if add_defaults - else {} - ) - if extras: - for k, conf in extras.items(): - try: - res[k] = extract_config(flat_data=data, config=conf) - except Exception as e: - logger.error(f"failed to extract config from {conf.__name__}") - raise e - for v in validate: - try: - extract_config(flat_data=data, config=v) - except Exception as e: - raise Exception(f"failed to validate config {v.__name__}") from e - - return res - - -def add_options( - cmd: click.Command, extras: t.List[t.Type[CliConfig]], is_src: bool = True -) -> click.Command: - configs: t.List[t.Type[CliConfig]] = ( - [ - CliPartitionConfig, - CliReadConfig, - CliEmbeddingConfig, - CliChunkingConfig, - CliProcessorConfig, - CliPermissionsConfig, - CliRetryStrategyConfig, - ] - if is_src - else [] - ) - # make sure what's unique to this cmd appears first - extras.extend(configs) - for config in extras: - try: - config.add_cli_options(cmd=cmd) - except ValueError as e: - raise ValueError(f"failed to set configs from {config.__name__}: {e}") - return cmd - - -class Group(click.Group): - def parse_args(self, ctx, args): - """ - This allows for subcommands to be called with the --help flag without breaking - if parent command is missing any of its required parameters - """ - - try: - return super().parse_args(ctx, args) - except click.MissingParameter: - if "--help" not in args: - raise - - # remove the required params so that help can display - for param in self.params: - param.required = False - return super().parse_args(ctx, args) - - def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> None: - """ - Copy of the original click.Group format_commands() method but replacing - 'Commands' -> 'Destinations' - """ - commands = [] - for subcommand in self.list_commands(ctx): - cmd = self.get_command(ctx, subcommand) - # What is this, the tool lied about a command. Ignore it - if cmd is None: - continue - if cmd.hidden: - continue - - commands.append((subcommand, cmd)) - - # allow for 3 times the default spacing - if len(commands): - if formatter.width: - limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands) - else: - limit = -6 - max(len(cmd[0]) for cmd in commands) - - rows = [] - for subcommand, cmd in commands: - help = cmd.get_short_help_str(limit) - rows.append((subcommand, help)) - - if rows: - with formatter.section(_("Destinations")): - formatter.write_dl(rows) diff --git a/unstructured/ingest/connector/__init__.py b/unstructured/ingest/connector/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/connector/airtable.py b/unstructured/ingest/connector/airtable.py deleted file mode 100644 index 27669d4a3..000000000 --- a/unstructured/ingest/connector/airtable.py +++ /dev/null @@ -1,309 +0,0 @@ -import typing as t -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from pyairtable import Api - - -@dataclass -class AirtableAccessConfig(AccessConfig): - personal_access_token: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleAirtableConfig(BaseConnectorConfig): - """Connector config where: - auth_token is the authentication token to authenticate into Airtable. - - Check https://support.airtable.com/docs/airtable-api-key-deprecation-notice - for more info on authentication. - """ - - access_config: AirtableAccessConfig - list_of_paths: t.Optional[str] = None - - -@dataclass -class AirtableTableMeta: - """Metadata specifying a table id, a base id which the table is stored in, - and an t.Optional view id in case particular rows and fields are to be ingested""" - - base_id: str - table_id: str - view_id: t.Optional[str] = None - - -@dataclass -class AirtableIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing). - - Current implementation creates an Airtable connection object - to fetch each document, rather than creating a it for each thread. - """ - - connector_config: SimpleAirtableConfig - table_meta: AirtableTableMeta - registry_name: str = "airtable" - - @property - def filename(self): - return ( - Path(self.read_config.download_dir) - / self.table_meta.base_id - / f"{self.table_meta.table_id}.csv" - ).resolve() - - @property - def _output_filename(self): - """Create output file path based on output directory, base id, and table id""" - output_file = f"{self.table_meta.table_id}.json" - return Path(self.processor_config.output_dir) / self.table_meta.base_id / output_file - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "base_id": self.table_meta.base_id, - "table_id": self.table_meta.table_id, - "view_id": self.table_meta.view_id, - } - - @property - def version(self) -> t.Optional[str]: - return None - - @requires_dependencies(["pyairtable"], extras="airtable") - def _query_table(self): - from pyairtable import Api - - api = Api(self.connector_config.access_config.personal_access_token) - table = api.table(self.table_meta.base_id, self.table_meta.table_id) - table_url = table.url - rows = table.all( - view=self.table_meta.view_id, - ) - return rows, table_url - - @SourceConnectionNetworkError.wrap - def _get_table_rows(self): - rows, table_url = self._query_table() - - if len(rows) == 0: - logger.info("Empty document, retrieved table but it has no rows.") - return rows, table_url - - def update_source_metadata(self, **kwargs): - """Gets file metadata from the current table.""" - - rows, table_url = kwargs.get("rows_tuple", self._get_table_rows()) - if rows is None or len(rows) < 1: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - dates = [r.get("createdTime", "") for r in rows] - dates.sort() - - date_created = datetime.strptime( - dates[0], - "%Y-%m-%dT%H:%M:%S.%fZ", - ).isoformat() - - date_modified = datetime.strptime( - dates[-1], - "%Y-%m-%dT%H:%M:%S.%fZ", - ).isoformat() - - self.source_metadata = SourceMetadata( - date_created=date_created, - date_modified=date_modified, - source_url=table_url, - exists=True, - ) - - @SourceConnectionError.wrap - @requires_dependencies(["pandas"]) - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - import pandas as pd - - rows, table_url = self._get_table_rows() - self.update_source_metadata(rows_tuple=(rows, table_url)) - if rows is None: - raise ValueError( - "Failed to retrieve rows from table " - f"{self.table_meta.base_id}/{self.table_meta.table_id}. Check logs", - ) - # NOTE: Might be a good idea to add pagination for large tables - df = pd.DataFrame.from_dict( - [row["fields"] for row in rows], - ).sort_index(axis=1) - - self.document = df.to_csv() - self.filename.parent.mkdir(parents=True, exist_ok=True) - - with open(self.filename, "w", encoding="utf8") as f: - f.write(self.document) - - -airtable_id_prefixes = ["app", "tbl", "viw"] - - -def raise_airtable_path_error(piece): - if any(piece[:3] == prefix for prefix in airtable_id_prefixes): - raise ( - ValueError( - "Path components are not correctly ordered.\ - Valid path structures: \ - - base_id/table_id/view_id , \ - - base_id/table_id, \ - - base_id .\ - It is also possible to leave --airtable-list-of-paths \ - argument empty (this will ingest everything).", - ) - ) - else: - raise ( - ValueError( - """Path components are not valid Airtable ids. - base_id should look like: appAbcDeF1ghijKlm, - table_id should look like: tblAbcDeF1ghijKlm, - view_id should look like: viwAbcDeF1ghijKlm""", - ) - ) - - -def check_path_validity(path): - pieces = path.split("/") - assert ( - 1 <= len(pieces) <= 3 - ), "Path should be composed of between 1-3 \ - components (base_id, table_id, view_id)." - - for i, piece in enumerate(pieces): - try: - assert piece[:3] == airtable_id_prefixes[i] - except AssertionError: - raise_airtable_path_error(piece) - - -@dataclass -class AirtableSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Fetches tables or views from an Airtable org.""" - - connector_config: SimpleAirtableConfig - _api: t.Optional["Api"] = field(init=False, default=None) - - @property - def api(self): - if self._api is None: - self._api = Api(self.connector_config.access_config.personal_access_token) - return self._api - - @api.setter - def api(self, api: "Api"): - self._api = api - - def check_connection(self): - import requests - - try: - self.api.request(method="HEAD", url=self.api.build_url("meta", "bases")) - except requests.HTTPError as http_error: - logger.error(f"failed to validate connection: {http_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {http_error}") - - @requires_dependencies(["pyairtable"], extras="airtable") - def initialize(self): - from pyairtable import Api - - self.base_ids_to_fetch_tables_from = [] - if self.connector_config.list_of_paths: - self.list_of_paths = self.connector_config.list_of_paths.split() - - self.api = Api(self.connector_config.access_config.personal_access_token) - - @requires_dependencies(["pyairtable"], extras="airtable") - def use_all_bases(self): - from pyairtable.metadata import get_api_bases - - self.base_ids_to_fetch_tables_from = [ - base["id"] for base in get_api_bases(self.api)["bases"] - ] - - @requires_dependencies(["pyairtable"], extras="airtable") - def fetch_table_ids(self): - from pyairtable.metadata import get_base_schema - - bases = [ - (base_id, self.api.base(base_id)) for base_id in self.base_ids_to_fetch_tables_from - ] - - metadata_for_each_base = [ - (base_id, get_base_schema(base)["tables"]) for base_id, base in bases - ] - - baseid_tableid_viewid_tuples = [ - (base_id, table["id"], None) - for base_id, base_metadata in metadata_for_each_base - for table in base_metadata - ] - - return baseid_tableid_viewid_tuples - - def get_ingest_docs(self): - """Fetches documents in an Airtable org.""" - - # When no list of paths provided, the connector ingests everything. - if not self.connector_config.list_of_paths: - self.use_all_bases() - baseid_tableid_viewid_tuples = self.fetch_table_ids() - - # When there is a list of paths, the connector checks the validity - # of the paths, and fetches table_ids to be ingested, based on the paths. - else: - self.paths = self.connector_config.list_of_paths.split() - self.paths = [path.strip("/") for path in self.paths] - - [check_path_validity(path) for path in self.paths] - - self.base_ids_to_fetch_tables_from = [] - baseid_tableid_viewid_tuples = [] - - for path in self.paths: - components = path.split("/") - if len(components) == 1: # only a base_id is provided - self.base_ids_to_fetch_tables_from.append(components[0]) - elif len(components) == 2: # a base_id and a table_id are provided - baseid_tableid_viewid_tuples.append((components[0], components[1], None)) - elif len(components) == 3: # a base_id, table_id, and a view_id are provided - baseid_tableid_viewid_tuples.append( - (components[0], components[1], components[2]), - ) - - baseid_tableid_viewid_tuples += self.fetch_table_ids() - return [ - AirtableIngestDoc( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - table_meta=AirtableTableMeta(base_id, table_id, view_id), - ) - for base_id, table_id, view_id in baseid_tableid_viewid_tuples - ] diff --git a/unstructured/ingest/connector/astradb.py b/unstructured/ingest/connector/astradb.py deleted file mode 100644 index 2642ea191..000000000 --- a/unstructured/ingest/connector/astradb.py +++ /dev/null @@ -1,238 +0,0 @@ -import copy -import typing as t -from dataclasses import dataclass, field -from pathlib import Path - -from unstructured import __name__ as integration_name -from unstructured.__version__ import __version__ as integration_version -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from astrapy.db import AstraDB, AstraDBCollection - -NON_INDEXED_FIELDS = ["metadata._node_content", "content"] - - -@dataclass -class AstraDBAccessConfig(AccessConfig): - token: str = enhanced_field(sensitive=True) - api_endpoint: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleAstraDBConfig(BaseConnectorConfig): - access_config: AstraDBAccessConfig - collection_name: str - namespace: t.Optional[str] = None - - -@dataclass -class AstraDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleAstraDBConfig - metadata: t.Dict[str, str] = field(default_factory=dict) - registry_name: str = "astradb" - - @property - def filename(self): - return ( - Path(self.read_config.download_dir) - / self.connector_config.collection_name - / f"{self.metadata['_id']}.txt" - ).resolve() - - @property - def _output_filename(self): - return ( - Path(self.processor_config.output_dir) - / self.connector_config.collection_name - / f"{self.metadata['_id']}.json" - ).resolve() - - def update_source_metadata(self, **kwargs): - if not self.metadata: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - self.source_metadata = SourceMetadata( - exists=True, - ) - - @SourceConnectionError.wrap - @requires_dependencies(["astrapy"], extras="astradb") - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - self.filename.parent.mkdir(parents=True, exist_ok=True) - - flattened_dict = flatten_dict(dictionary=self.metadata) - str_values = [str(value) for value in flattened_dict.values()] - concatenated_values = "\n".join(str_values) - - with open(self.filename, "w") as f: - f.write(concatenated_values) - - -@dataclass -class AstraDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleAstraDBConfig - _astra_db: t.Optional["AstraDB"] = field(init=False, default=None) - _astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None) - - @property - @requires_dependencies(["astrapy"], extras="astradb") - def astra_db_collection(self) -> "AstraDBCollection": - if self._astra_db_collection is None: - from astrapy.db import AstraDB - - # Build the Astra DB object. - # caller_name/version for Astra DB tracking - self._astra_db = AstraDB( - api_endpoint=self.connector_config.access_config.api_endpoint, - token=self.connector_config.access_config.token, - namespace=self.connector_config.namespace, - caller_name=integration_name, - caller_version=integration_version, - ) - - # Create and connect to the collection - self._astra_db_collection = self._astra_db.collection( - collection_name=self.connector_config.collection_name, - ) - return self._astra_db_collection # type: ignore - - @requires_dependencies(["astrapy"], extras="astradb") - @SourceConnectionError.wrap # type: ignore - def initialize(self): - _ = self.astra_db_collection - - @requires_dependencies(["astrapy"], extras="astradb") - def check_connection(self): - try: - _ = self.astra_db_collection - except Exception as e: - logger.error(f"Failed to validate connection {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - @requires_dependencies(["astrapy"], extras="astradb") - def get_ingest_docs(self): # type: ignore - # Perform the find operation - astra_docs = list(self.astra_db_collection.paginated_find()) - - doc_list = [] - for record in astra_docs: - doc = AstraDBIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - metadata=record, - ) - - doc.update_source_metadata() - - doc_list.append(doc) - - return doc_list - - -@dataclass -class AstraDBWriteConfig(WriteConfig): - embedding_dimension: int - requested_indexing_policy: t.Optional[t.Dict[str, t.Any]] = None - batch_size: int = 20 - - -@dataclass -class AstraDBDestinationConnector(BaseDestinationConnector): - write_config: AstraDBWriteConfig - connector_config: SimpleAstraDBConfig - _astra_db: t.Optional["AstraDB"] = field(init=False, default=None) - _astra_db_collection: t.Optional["AstraDBCollection"] = field(init=False, default=None) - - def to_dict(self, **kwargs): - """ - The _astra_db_collection variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle '_thread.lock' object - When serializing, remove it, meaning client data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - - if hasattr(self_cp, "_astra_db_collection"): - setattr(self_cp, "_astra_db_collection", None) - - return _asdict(self_cp, **kwargs) - - @property - @requires_dependencies(["astrapy"], extras="astradb") - def astra_db_collection(self) -> "AstraDBCollection": - if self._astra_db_collection is None: - from astrapy.db import AstraDB - - collection_name = self.connector_config.collection_name - embedding_dimension = self.write_config.embedding_dimension - - # If the user has requested an indexing policy, pass it to the Astra DB - requested_indexing_policy = self.write_config.requested_indexing_policy - options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None - - # caller_name/version for Astra DB tracking - self._astra_db = AstraDB( - api_endpoint=self.connector_config.access_config.api_endpoint, - token=self.connector_config.access_config.token, - namespace=self.connector_config.namespace, - caller_name=integration_name, - caller_version=integration_version, - ) - - # Create and connect to the newly created collection - self._astra_db_collection = self._astra_db.create_collection( - collection_name=collection_name, - dimension=embedding_dimension, - options=options, - ) - return self._astra_db_collection - - @requires_dependencies(["astrapy"], extras="astradb") - @DestinationConnectionError.wrap - def initialize(self): - _ = self.astra_db_collection - - @requires_dependencies(["astrapy"], extras="astradb") - def check_connection(self): - try: - _ = self.astra_db_collection - except Exception as e: - logger.error(f"Failed to validate connection {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info(f"Inserting / updating {len(elements_dict)} documents to Astra DB.") - - astra_batch_size = self.write_config.batch_size - - for batch in batch_generator(elements_dict, astra_batch_size): - self._astra_db_collection.insert_many(batch) - - def normalize_dict(self, element_dict: dict) -> dict: - return { - "$vector": element_dict.pop("embeddings", None), - "content": element_dict.pop("text", None), - "metadata": element_dict, - } diff --git a/unstructured/ingest/connector/azure_cognitive_search.py b/unstructured/ingest/connector/azure_cognitive_search.py deleted file mode 100644 index fc932eb5e..000000000 --- a/unstructured/ingest/connector/azure_cognitive_search.py +++ /dev/null @@ -1,142 +0,0 @@ -import json -import typing as t -import uuid -from dataclasses import dataclass, field - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError, WriteError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from azure.search.documents import SearchClient - - -@dataclass -class AzureCognitiveSearchAccessConfig(AccessConfig): - key: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleAzureCognitiveSearchStorageConfig(BaseConnectorConfig): - endpoint: str - access_config: AzureCognitiveSearchAccessConfig - - -@dataclass -class AzureCognitiveSearchWriteConfig(WriteConfig): - index: str - - -@dataclass -class AzureCognitiveSearchDestinationConnector(BaseDestinationConnector): - write_config: AzureCognitiveSearchWriteConfig - connector_config: SimpleAzureCognitiveSearchStorageConfig - _client: t.Optional["SearchClient"] = field(init=False, default=None) - - @requires_dependencies(["azure.search"], extras="azure-cognitive-search") - def generate_client(self) -> "SearchClient": - from azure.core.credentials import AzureKeyCredential - from azure.search.documents import SearchClient - - # Create a client - credential = AzureKeyCredential(self.connector_config.access_config.key) - return SearchClient( - endpoint=self.connector_config.endpoint, - index_name=self.write_config.index, - credential=credential, - ) - - @property - def client(self) -> "SearchClient": - if self._client is None: - self._client = self.generate_client() - return self._client - - def check_connection(self): - try: - self.client.get_document_count() - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def initialize(self): - _ = self.client - - def conform_dict(self, data: dict) -> None: - """ - updates the dictionary that is from each Element being converted into a dict/json - into a dictionary that conforms to the schema expected by the - Azure Cognitive Search index - """ - from dateutil import parser # type: ignore - - data["id"] = str(uuid.uuid4()) - - if points := data.get("metadata", {}).get("coordinates", {}).get("points"): - data["metadata"]["coordinates"]["points"] = json.dumps(points) - if version := data.get("metadata", {}).get("data_source", {}).get("version"): - data["metadata"]["data_source"]["version"] = str(version) - if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"): - data["metadata"]["data_source"]["record_locator"] = json.dumps(record_locator) - if permissions_data := ( - data.get("metadata", {}).get("data_source", {}).get("permissions_data") - ): - data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data) - if links := data.get("metadata", {}).get("links"): - data["metadata"]["links"] = [json.dumps(link) for link in links] - if last_modified := data.get("metadata", {}).get("last_modified"): - data["metadata"]["last_modified"] = parser.parse(last_modified).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"): - data["metadata"]["data_source"]["date_created"] = parser.parse(date_created).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"): - data["metadata"]["data_source"]["date_modified"] = parser.parse(date_modified).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"): - data["metadata"]["data_source"]["date_processed"] = parser.parse( - date_processed, - ).strftime("%Y-%m-%dT%H:%M:%S.%fZ") - if page_number := data.get("metadata", {}).get("page_number"): - data["metadata"]["page_number"] = str(page_number) - - @requires_dependencies(["azure"], extras="azure-cognitive-search") - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - import azure.core.exceptions - - logger.info( - f"writing {len(elements_dict)} documents to destination " - f"index at {self.write_config.index}", - ) - try: - results = self.client.upload_documents(documents=elements_dict) - - except azure.core.exceptions.HttpResponseError as http_error: - raise WriteError(f"http error: {http_error}") from http_error - errors = [] - success = [] - for result in results: - if result.succeeded: - success.append(result) - else: - errors.append(result) - logger.debug(f"results: {len(success)} successes, {len(errors)} failures") - if errors: - raise WriteError( - ", ".join( - [ - f"{error.key}: [{error.status_code}] {error.error_message}" - for error in errors - ], - ), - ) diff --git a/unstructured/ingest/connector/biomed.py b/unstructured/ingest/connector/biomed.py deleted file mode 100644 index 7371699e3..000000000 --- a/unstructured/ingest/connector/biomed.py +++ /dev/null @@ -1,313 +0,0 @@ -import os -import typing as t -import urllib.request -from dataclasses import dataclass -from ftplib import FTP, error_perm -from pathlib import Path - -import requests -from requests.adapters import HTTPAdapter - -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, -) -from unstructured.ingest.logger import logger -from unstructured.utils import ( - validate_date_args, -) - -DOMAIN = "ftp.ncbi.nlm.nih.gov" -FTP_DOMAIN = f"ftp://{DOMAIN}" -PMC_DIR = "pub/pmc" -PDF_DIR = "oa_pdf" - - -@dataclass -class BiomedFileMeta: - ftp_path: str - download_filepath: str - output_filepath: str - - -@dataclass -class SimpleBiomedConfig(BaseConnectorConfig): - """Connector config where path is the FTP directory path and - id_, from_, until, format are API parameters.""" - - path: t.Optional[str] = None - # OA Web Service API Options - api_id: t.Optional[str] = None - api_from: t.Optional[str] = None - api_until: t.Optional[str] = None - max_request_time: int = 45 - - def validate_api_inputs(self): - valid = False - - if self.api_from: - valid = validate_date_args(self.api_from) - - if self.api_until: - valid = validate_date_args(self.api_until) - - return valid - - def __post_init__(self): - self.is_file = False - self.is_dir = False - self.is_api = False - - if not self.path: - is_valid = self.validate_api_inputs() - if not is_valid: - raise ValueError( - "Path argument or at least one of the " - "OA Web Service arguments MUST be provided.", - ) - - self.is_api = True - else: - self.path = self.path.strip("/") - is_valid = self.path.lower().startswith(PDF_DIR) - - if not is_valid: - raise ValueError(f"Path MUST start with {PDF_DIR}") - - ftp = FTP(DOMAIN) - ftp.login() - - path = Path(PMC_DIR) / self.path - response = "" - try: - if path.suffix == ".pdf": - response = ftp.cwd(str(path.parent)) - self.is_file = True - else: - response = ftp.cwd(str(path)) - except error_perm as exc: - if "no such file or directory" in exc.args[0].lower(): - raise ValueError(f"The path: {path} is not valid.") - elif "not a directory" in exc.args[0].lower(): - self.is_file = True - elif "command successful" in response: - self.is_dir = True - else: - raise ValueError( - "Something went wrong when validating the path: {path}.", - ) - - -@dataclass -class BiomedIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleBiomedConfig - file_meta: BiomedFileMeta - registry_name: str = "biomed" - - @property - def filename(self): - return Path(self.file_meta.download_filepath).resolve() # type: ignore - - @property - def _output_filename(self): - return Path(f"{self.file_meta.output_filepath}.json").resolve() - - def cleanup_file(self): - if ( - not self.read_config.preserve_downloads - and self.filename.is_file() - and not self.read_config.download_only - ): - logger.debug(f"Cleaning up {self}") - Path.unlink(self.filename) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - download_path = self.file_meta.download_filepath # type: ignore - dir_ = Path(os.path.dirname(download_path)) # type: ignore - if not dir_.is_dir(): - logger.debug(f"Creating directory: {dir_}") - - if dir_: - dir_.mkdir(parents=True, exist_ok=True) - self._retrieve() - logger.debug(f"File downloaded: {self.file_meta.download_filepath}") - - @SourceConnectionNetworkError.wrap - def _retrieve(self): - urllib.request.urlretrieve( - self.file_meta.ftp_path, # type: ignore - self.file_meta.download_filepath, - ) - - -class BiomedSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Objects of this class support fetching documents from Biomedical literature FTP directory""" - - connector_config: SimpleBiomedConfig - - def get_base_endpoints_url(self) -> str: - endpoint_url = "https://www.ncbi.nlm.nih.gov/pmc/utils/oa/oa.fcgi?format=pdf" - - if self.connector_config.api_id: - endpoint_url += f"&id={self.connector_config.api_id}" - - if self.connector_config.api_from: - endpoint_url += f"&from={self.connector_config.api_from}" - - if self.connector_config.api_until: - endpoint_url += f"&until={self.connector_config.api_until}" - - return endpoint_url - - def _list_objects_api(self) -> t.List[BiomedFileMeta]: - from bs4 import BeautifulSoup - - def urls_to_metadata(urls): - files = [] - for url in urls: - parts = url.split(PDF_DIR) - if len(parts) > 1: - local_path = parts[1].strip("/") - files.append( - BiomedFileMeta( - ftp_path=url, - download_filepath=(Path(self.read_config.download_dir) / local_path) - .resolve() - .as_posix(), - output_filepath=(Path(self.processor_config.output_dir) / local_path) - .resolve() - .as_posix(), - ), - ) - - return files - - files: t.List[BiomedFileMeta] = [] - - endpoint_url = self.get_base_endpoints_url() - - while endpoint_url: - session = requests.Session() - adapter = HTTPAdapter() - session.mount("http://", adapter) - session.mount("https://", adapter) - response = self._get_request(session=session, endpoint_url=endpoint_url) - soup = BeautifulSoup(response.content, features="lxml") - urls = [link["href"] for link in soup.find_all("link")] - - if not urls: - return files - - endpoint_url = urls[-1] if "resumptiontoken" in urls[-1].lower() else None - if endpoint_url: - urls = urls[:-1] - - files.extend(urls_to_metadata(urls)) - - return files - - @SourceConnectionNetworkError.wrap - def _get_request(self, session: requests.Session, endpoint_url: str) -> requests.Response: - return session.get(endpoint_url, timeout=self.connector_config.max_request_time) - - def _list_objects(self) -> t.List[BiomedFileMeta]: - files = [] - - # Conform to mypy, null check performed elsewhere. - # Wouldn't be in this method unless self.config.path exists - path: str = self.connector_config.path if self.connector_config.path else "" - - def traverse(path, download_dir, output_dir): - full_path = Path(PMC_DIR) / path - logger.debug(f"Traversing directory: {full_path}") - - ftp = FTP(DOMAIN) - ftp.login() - - try: - response = ftp.cwd(str(full_path)) - except error_perm: - raise ValueError(f"{full_path} is not a valid directory.") - - if "command successful" in response.lower(): - sub_paths = [path / p for p in ftp.nlst()] - - if not sub_paths: - return - - ext = Path(sub_paths[0]).suffix - if ext: - for sub_path in sub_paths: - ftp_path = f"{FTP_DOMAIN}/{PMC_DIR}/{sub_path}" - local_path = "/".join(str(sub_path).split("/")[1:]) - files.append( - BiomedFileMeta( - ftp_path=ftp_path, - download_filepath=(Path(self.read_config.download_dir) / local_path) - .resolve() - .as_posix(), - output_filepath=( - Path(self.processor_config.output_dir) / local_path - ) - .resolve() - .as_posix(), - ), - ) - - else: - for sub_path in sub_paths: - traverse(sub_path, download_dir, output_dir) - - else: - raise ValueError(f"{full_path} is not a valid directory.") - - ftp_path = f"{FTP_DOMAIN}/{PMC_DIR}/{self.connector_config.path}" - if self.connector_config.is_file: - local_path = "/".join(path.split("/")[1:]) - return [ - BiomedFileMeta( - ftp_path=ftp_path, - download_filepath=(Path(self.read_config.download_dir) / local_path) - .resolve() - .as_posix(), - output_filepath=(Path(self.processor_config.output_dir) / local_path) - .resolve() - .as_posix(), - ), - ] - else: - traverse( - Path(path), - Path(self.read_config.download_dir), - Path(self.processor_config.output_dir), - ) - - return files - - def initialize(self): - pass - - def check_connection(self): - resp = requests.head(self.get_base_endpoints_url()) - try: - resp.raise_for_status() - except requests.HTTPError as http_error: - raise SourceConnectionError(f"failed to validate connection: {http_error}") - - def get_ingest_docs(self): - files = self._list_objects_api() if self.connector_config.is_api else self._list_objects() - return [ - BiomedIngestDoc( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - file_meta=file, - ) - for file in files - ] diff --git a/unstructured/ingest/connector/chroma.py b/unstructured/ingest/connector/chroma.py deleted file mode 100644 index 547b988a2..000000000 --- a/unstructured/ingest/connector/chroma.py +++ /dev/null @@ -1,159 +0,0 @@ -import copy -import typing as t -import uuid -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from chromadb import Collection as ChromaCollection - - -@dataclass -class ChromaAccessConfig(AccessConfig): - settings: t.Optional[t.Dict[str, str]] = None - headers: t.Optional[t.Dict[str, str]] = None - - -@dataclass -class SimpleChromaConfig(BaseConnectorConfig): - access_config: ChromaAccessConfig - collection_name: str - path: t.Optional[str] = None - tenant: t.Optional[str] = "default_tenant" - database: t.Optional[str] = "default_database" - host: t.Optional[str] = None - port: t.Optional[int] = None - ssl: bool = False - - -@dataclass -class ChromaWriteConfig(WriteConfig): - batch_size: int = 100 - - -@dataclass -class ChromaDestinationConnector(BaseDestinationConnector): - write_config: ChromaWriteConfig - connector_config: SimpleChromaConfig - _collection: t.Optional["ChromaCollection"] = None - - @property - def chroma_collection(self): - if self._collection is None: - self._collection = self.create_collection() - return self._collection - - def initialize(self): - pass - - @DestinationConnectionError.wrap - def check_connection(self): - _ = self.chroma_collection - - def to_dict(self, **kwargs): - """ - The _collection variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle 'module' object - When serializing, remove it, meaning collection data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - if hasattr(self_cp, "_collection"): - setattr(self_cp, "_collection", None) - return _asdict(self_cp, **kwargs) - - @requires_dependencies(["chromadb"], extras="chroma") - def create_collection(self) -> "ChromaCollection": - import chromadb - - if self.connector_config.path: - chroma_client = chromadb.PersistentClient( - path=self.connector_config.path, - settings=self.connector_config.settings, - tenant=self.connector_config.tenant, - database=self.connector_config.database, - ) - - elif self.connector_config.host and self.connector_config.port: - chroma_client = chromadb.HttpClient( - host=self.connector_config.host, - port=self.connector_config.port, - ssl=self.connector_config.ssl, - headers=self.connector_config.access_config.headers, - settings=self.connector_config.access_config.settings, - tenant=self.connector_config.tenant, - database=self.connector_config.database, - ) - else: - raise ValueError("Chroma connector requires either path or host and port to be set.") - - collection = chroma_client.get_or_create_collection( - name=self.connector_config.collection_name - ) - return collection - - @DestinationConnectionError.wrap - @requires_dependencies(["chromadb"], extras="chroma") - def upsert_batch(self, batch): - collection = self.chroma_collection - - try: - # Chroma wants lists even if there is only one element - # Upserting to prevent duplicates - collection.upsert( - ids=batch["ids"], - documents=batch["documents"], - embeddings=batch["embeddings"], - metadatas=batch["metadatas"], - ) - except Exception as e: - raise ValueError(f"chroma error: {e}") from e - - @staticmethod - def prepare_chroma_list(chunk: t.Tuple[t.Dict[str, t.Any]]) -> t.Dict[str, t.List[t.Any]]: - """Helper function to break a tuple of dicts into list of parallel lists for ChromaDb. - ({'id':1}, {'id':2}, {'id':3}) -> {'ids':[1,2,3]}""" - chroma_dict = {} - chroma_dict["ids"] = [x.get("id") for x in chunk] - chroma_dict["documents"] = [x.get("document") for x in chunk] - chroma_dict["embeddings"] = [x.get("embedding") for x in chunk] - chroma_dict["metadatas"] = [x.get("metadata") for x in chunk] - # Make sure all lists are of the same length - assert ( - len(chroma_dict["ids"]) - == len(chroma_dict["documents"]) - == len(chroma_dict["embeddings"]) - == len(chroma_dict["metadatas"]) - ) - return chroma_dict - - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info(f"Inserting / updating {len(elements_dict)} documents to destination ") - - chroma_batch_size = self.write_config.batch_size - - for chunk in batch_generator(elements_dict, chroma_batch_size): - self.upsert_batch(self.prepare_chroma_list(chunk)) - - def normalize_dict(self, element_dict: dict) -> dict: - element_id = element_dict.get("element_id", str(uuid.uuid4())) - return { - "id": element_id, - "embedding": element_dict.pop("embeddings", None), - "document": element_dict.pop("text", None), - "metadata": flatten_dict( - element_dict, separator="-", flatten_lists=True, remove_none=True - ), - } diff --git a/unstructured/ingest/connector/clarifai.py b/unstructured/ingest/connector/clarifai.py deleted file mode 100644 index 1c1e06412..000000000 --- a/unstructured/ingest/connector/clarifai.py +++ /dev/null @@ -1,122 +0,0 @@ -import typing as t -import uuid -from dataclasses import dataclass, field - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from clarifai.client.input import Inputs - - -@dataclass -class ClarifaiAccessConfig(AccessConfig): - api_key: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleClarifaiConfig(BaseConnectorConfig): - access_config: ClarifaiAccessConfig - app_id: str - user_id: str - dataset_id: t.Optional[str] = None - - -@dataclass -class ClarifaiWriteConfig(WriteConfig): - batch_size: int = 50 - - -@dataclass -class ClarifaiDestinationConnector(BaseDestinationConnector): - write_config: ClarifaiWriteConfig - connector_config: SimpleClarifaiConfig - _client: t.Optional["Inputs"] = field(init=False, default=None) - - @property - @requires_dependencies(["clarifai"], extras="clarifai") - def client(self) -> "Inputs": - if self._client is None: - from clarifai.client.input import Inputs - - access_conf = self.connector_config.access_config - try: - if access_conf.api_key is not None: - clarifai_pat = access_conf.api_key - except Exception as e: - raise (f"please provide clarifai PAT key : {e}") - - self._client = Inputs( - app_id=self.connector_config.app_id, - user_id=self.connector_config.user_id, - pat=clarifai_pat, - ) - return self._client - - @requires_dependencies(["clarifai"], extras="clarifai") - @DestinationConnectionError.wrap - def initialize(self): - _ = self.client - - def check_connection(self): - try: - _ = [inp for inp in self.client.list_inputs(page_no=1, per_page=1)] # noqa: C416 - except Exception as e: - logger.error(f"Failed to validate connection {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def normalize_dict(self, element_dict: dict) -> dict: - """Modifying schema of the dict in order to compile with clarifai input formats""" - return { - "input_id": str(uuid.uuid4().hex), - "text": element_dict.pop("text", None), - "metadata": { - **flatten_dict( - element_dict, - separator="_", - flatten_lists=True, - remove_none=True, - ), - }, - } - - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - from google.protobuf.struct_pb2 import Struct - - logger.info( - f"writing {len(elements_dict)} objects to destination " - f"app {self.connector_config.app_id} " - ) - try: - batch_size = self.write_config.batch_size - for idx in range(0, len(elements_dict), batch_size): - batch_dict = elements_dict[idx : batch_size + idx] - input_batch = [] - for elem in batch_dict: - meta_struct = Struct() - meta_struct.update(elem["metadata"]) - input_batch.append( - self._client.get_text_input( - input_id=elem["input_id"], - raw_text=elem["text"], - dataset_id=self.connector_config.dataset_id, - metadata=meta_struct, - ) - ) - result_id = self._client.upload_inputs(inputs=input_batch) - logger.debug( - f"Input posted successfully into {self.connector_config.app_id}. \ - Result id: {result_id}" - ) - - except Exception as e: - raise e diff --git a/unstructured/ingest/connector/confluence.py b/unstructured/ingest/connector/confluence.py deleted file mode 100644 index 4e1369349..000000000 --- a/unstructured/ingest/connector/confluence.py +++ /dev/null @@ -1,285 +0,0 @@ -import math -import typing as t -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path - -import requests - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from atlassian import Confluence - - -@dataclass -class ConfluenceAccessConfig(AccessConfig): - api_token: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleConfluenceConfig(BaseConnectorConfig): - """Connector config where: - user_email is the email to authenticate into Confluence Cloud, - api_token is the api token to authenticate into Confluence Cloud, - and url is the URL pointing to the Confluence Cloud instance. - - Check https://developer.atlassian.com/cloud/confluence/basic-auth-for-rest-apis/ - for more info on the api_token. - """ - - user_email: str - access_config: ConfluenceAccessConfig - url: str - max_num_of_spaces: int = 500 - max_num_of_docs_from_each_space: int = 100 - spaces: t.List[str] = field(default_factory=list) - - -@dataclass -class ConfluenceDocumentMeta: - """Metadata specifying: - id for the confluence space that the document locates in, - and the id of document that is being reached to. - """ - - space_id: str - document_id: str - - -def scroll_wrapper(func): - def wrapper(*args, **kwargs): - """Wraps a function to obtain scroll functionality.""" - number_of_items_to_fetch = kwargs["number_of_items_to_fetch"] - del kwargs["number_of_items_to_fetch"] - - kwargs["limit"] = min(100, number_of_items_to_fetch) - kwargs["start"] = kwargs.get("start", 0) - - all_results = [] - num_iterations = math.ceil(number_of_items_to_fetch / kwargs["limit"]) - - for _ in range(num_iterations): - response = func(*args, **kwargs) - if isinstance(response, list): - all_results += func(*args, **kwargs) - elif isinstance(response, dict): - all_results += func(*args, **kwargs)["results"] - - kwargs["start"] += kwargs["limit"] - - return all_results[:number_of_items_to_fetch] - - return wrapper - - -@dataclass -class ConfluenceIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing). - - Current implementation creates a Confluence connection object - to fetch each doc, rather than creating a it for each thread. - """ - - connector_config: SimpleConfluenceConfig - document_meta: ConfluenceDocumentMeta - registry_name: str = "confluence" - - # TODO: remove one of filename or _tmp_download_file, using a wrapper - @property - def filename(self): - if not self.read_config.download_dir: - return None - return ( - Path(self.read_config.download_dir) - / self.document_meta.space_id - / f"{self.document_meta.document_id}.html" - ).resolve() - - @property - def _output_filename(self): - """Create output file path based on output directory, space id and document id.""" - output_file = f"{self.document_meta.document_id}.json" - return Path(self.processor_config.output_dir) / self.document_meta.space_id / output_file - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "url": self.connector_config.url, - "page_id": self.document_meta.document_id, - } - - @SourceConnectionNetworkError.wrap - @requires_dependencies(["atlassian"], extras="Confluence") - def _get_page(self): - from atlassian import Confluence - from atlassian.errors import ApiError - - try: - confluence = Confluence( - self.connector_config.url, - username=self.connector_config.user_email, - password=self.connector_config.access_config.api_token, - ) - result = confluence.get_page_by_id( - page_id=self.document_meta.document_id, - expand="history.lastUpdated,version,body.view", - ) - except ApiError as e: - logger.error(e) - return None - return result - - def update_source_metadata(self, **kwargs): - """Fetches file metadata from the current page.""" - page = kwargs.get("page", self._get_page()) - if page is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - document_history = page["history"] - date_created = datetime.strptime( - document_history["createdDate"], - "%Y-%m-%dT%H:%M:%S.%fZ", - ).isoformat() - if last_updated := document_history.get("lastUpdated", {}).get("when", ""): - date_modified = datetime.strptime( - last_updated, - "%Y-%m-%dT%H:%M:%S.%fZ", - ).isoformat() - else: - date_modified = date_created - version = page["version"]["number"] - self.source_metadata = SourceMetadata( - date_created=date_created, - date_modified=date_modified, - version=version, - source_url=page["_links"].get("self", None), - exists=True, - ) - - @SourceConnectionError.wrap - @requires_dependencies(["atlassian"], extras="confluence") - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - # TODO: instead of having a separate connection object for each doc, - # have a separate connection object for each process - - result = self._get_page() - self.update_source_metadata(page=result) - if result is None: - raise ValueError(f"Failed to retrieve page with ID {self.document_meta.document_id}") - self.document = result["body"]["view"]["value"] - self.filename.parent.mkdir(parents=True, exist_ok=True) - with open(self.filename, "w", encoding="utf8") as f: - f.write(self.document) - - -@dataclass -class ConfluenceSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Fetches body fields from all documents within all spaces in a Confluence Cloud instance.""" - - connector_config: SimpleConfluenceConfig - _confluence: t.Optional["Confluence"] = field(init=False, default=None) - - @property - def confluence(self) -> "Confluence": - from atlassian import Confluence - - if self._confluence is None: - self._confluence = Confluence( - url=self.connector_config.url, - username=self.connector_config.user_email, - password=self.connector_config.access_config.api_token, - ) - return self._confluence - - @requires_dependencies(["atlassian"], extras="Confluence") - def check_connection(self): - url = "rest/api/space" - try: - self.confluence.request(method="HEAD", path=url) - except requests.HTTPError as http_error: - logger.error(f"failed to validate connection: {http_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {http_error}") - - @requires_dependencies(["atlassian"], extras="Confluence") - def initialize(self): - self.list_of_spaces = None - if self.connector_config.spaces: - self.list_of_spaces = self.connector_config.spaces - if self.connector_config.max_num_of_spaces: - logger.warning( - """--confluence-list-of-spaces and --confluence-num-of-spaces cannot - be used at the same time. Connector will only fetch the - --confluence-list-of-spaces that you've provided.""", - ) - - @requires_dependencies(["atlassian"], extras="Confluence") - def _get_space_ids(self): - """Fetches spaces in a confluence domain.""" - - get_spaces_with_scroll = scroll_wrapper(self.confluence.get_all_spaces) - - all_results = get_spaces_with_scroll( - number_of_items_to_fetch=self.connector_config.max_num_of_spaces, - ) - - space_ids = [space["key"] for space in all_results] - return space_ids - - @requires_dependencies(["atlassian"], extras="Confluence") - def _get_docs_ids_within_one_space( - self, - space_id: str, - content_type: str = "page", - ): - get_pages_with_scroll = scroll_wrapper(self.confluence.get_all_pages_from_space) - results = get_pages_with_scroll( - space=space_id, - number_of_items_to_fetch=self.connector_config.max_num_of_docs_from_each_space, - content_type=content_type, - ) - - doc_ids = [(space_id, doc["id"]) for doc in results] - return doc_ids - - @requires_dependencies(["atlassian"], extras="Confluence") - def _get_doc_ids_within_spaces(self): - space_ids = self._get_space_ids() if not self.list_of_spaces else self.list_of_spaces - - doc_ids_all = [self._get_docs_ids_within_one_space(space_id=id) for id in space_ids] - - doc_ids_flattened = [ - (space_id, doc_id) - for doc_ids_space in doc_ids_all - for space_id, doc_id in doc_ids_space - ] - return doc_ids_flattened - - def get_ingest_docs(self): - """Fetches all documents in a confluence space.""" - doc_ids = self._get_doc_ids_within_spaces() - return [ - ConfluenceIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - document_meta=ConfluenceDocumentMeta(space_id, doc_id), - ) - for space_id, doc_id in doc_ids - ] diff --git a/unstructured/ingest/connector/databricks_volumes.py b/unstructured/ingest/connector/databricks_volumes.py deleted file mode 100644 index 5662d65cd..000000000 --- a/unstructured/ingest/connector/databricks_volumes.py +++ /dev/null @@ -1,137 +0,0 @@ -import copy -import json -import os -import typing as t -from dataclasses import dataclass, field -from io import BytesIO -from pathlib import PurePath - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - BaseSingleIngestDoc, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from databricks.sdk import WorkspaceClient - - -@dataclass -class DatabricksVolumesAccessConfig(AccessConfig): - account_id: t.Optional[str] = None - username: t.Optional[str] = None - password: t.Optional[str] = enhanced_field(default=None, sensitive=True) - client_id: t.Optional[str] = None - client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True) - token: t.Optional[str] = enhanced_field(default=None, sensitive=True) - profile: t.Optional[str] = None - azure_workspace_resource_id: t.Optional[str] = None - azure_client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True) - azure_client_id: t.Optional[str] = None - azure_tenant_id: t.Optional[str] = None - azure_environment: t.Optional[str] = None - auth_type: t.Optional[str] = None - cluster_id: t.Optional[str] = None - google_credentials: t.Optional[str] = None - google_service_account: t.Optional[str] = None - - -@dataclass -class SimpleDatabricksVolumesConfig(BaseConnectorConfig): - access_config: DatabricksVolumesAccessConfig - host: t.Optional[str] = None - - -@dataclass -class DatabricksVolumesWriteConfig(WriteConfig): - volume: str - catalog: str - volume_path: t.Optional[str] = None - overwrite: bool = False - encoding: str = "utf-8" - schema: str = "default" - - @property - def path(self) -> str: - path = f"/Volumes/{self.catalog}/{self.schema}/{self.volume}" - if self.volume_path: - path = f"{path}/{self.volume_path}" - return path - - -@dataclass -class DatabricksVolumesDestinationConnector(BaseDestinationConnector): - write_config: DatabricksVolumesWriteConfig - connector_config: SimpleDatabricksVolumesConfig - _client: t.Optional["WorkspaceClient"] = field(init=False, default=None) - - def to_dict(self, **kwargs): - self_cp = copy.copy(self) - if hasattr(self_cp, "_client"): - setattr(self_cp, "_client", None) - return _asdict(self_cp, **kwargs) - - @requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes") - def generate_client(self) -> "WorkspaceClient": - from databricks.sdk import WorkspaceClient - - return WorkspaceClient( - host=self.connector_config.host, **self.connector_config.access_config.to_dict() - ) - - @property - def client(self) -> "WorkspaceClient": - if self._client is None: - self._client = self.generate_client() - return self._client - - def check_connection(self): - try: - assert self.client.current_user.me().active - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def initialize(self): - _ = self.client - - def write_dict( - self, - *args, - elements_dict: t.List[t.Dict[str, t.Any]], - filename: t.Optional[str] = None, - indent: int = 4, - encoding: str = "utf-8", - **kwargs, - ) -> None: - output_folder = self.write_config.path - output_folder = os.path.join(output_folder) # Make sure folder ends with file seperator - filename = ( - filename.strip(os.sep) if filename else filename - ) # Make sure filename doesn't begin with file seperator - output_path = str(PurePath(output_folder, filename)) if filename else output_folder - logger.debug(f"uploading content to {output_path}") - self.client.files.upload( - file_path=output_path, - contents=BytesIO(json.dumps(elements_dict).encode(encoding=self.write_config.encoding)), - overwrite=self.write_config.overwrite, - ) - - def get_elements_dict(self, docs: t.List[BaseSingleIngestDoc]) -> t.List[t.Dict[str, t.Any]]: - pass - - def write(self, docs: t.List[BaseSingleIngestDoc]) -> None: - for doc in docs: - file_path = doc.base_output_filename - filename = file_path if file_path else None - with open(doc._output_filename) as json_file: - logger.debug(f"uploading content from {doc._output_filename}") - json_list = json.load(json_file) - self.write_dict(elements_dict=json_list, filename=filename) diff --git a/unstructured/ingest/connector/delta_table.py b/unstructured/ingest/connector/delta_table.py deleted file mode 100644 index 1382ed05d..000000000 --- a/unstructured/ingest/connector/delta_table.py +++ /dev/null @@ -1,203 +0,0 @@ -import os -import typing as t -from dataclasses import dataclass -from datetime import datetime as dt -from multiprocessing import Process -from pathlib import Path - -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseDestinationConnector, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from deltalake import DeltaTable - - -@dataclass -class SimpleDeltaTableConfig(BaseConnectorConfig): - table_uri: t.Union[str, Path] - version: t.Optional[int] = None - storage_options: t.Optional[t.Dict[str, str]] = None - without_files: bool = False - - -@dataclass -class DeltaTableIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleDeltaTableConfig - uri: str - modified_date: str - created_at: str - registry_name: str = "delta-table" - - def uri_filename(self) -> str: - basename = os.path.basename(self.uri) - return os.path.splitext(basename)[0] - - @property - def filename(self): - return (Path(self.read_config.download_dir) / f"{self.uri_filename()}.csv").resolve() - - @property - def _output_filename(self): - """Create filename document id combined with a hash of the query to uniquely identify - the output file.""" - return Path(self.processor_config.output_dir) / f"{self.uri_filename()}.json" - - def _create_full_tmp_dir_path(self): - self.filename.parent.mkdir(parents=True, exist_ok=True) - self._output_filename.parent.mkdir(parents=True, exist_ok=True) - - @requires_dependencies(["fsspec"], extras="delta-table") - def _get_fs_from_uri(self): - from fsspec.core import url_to_fs - - try: - fs, _ = url_to_fs(self.uri) - except ImportError as error: - raise ImportError( - f"uri {self.uri} may be associated with a filesystem that " - f"requires additional dependencies: {error}", - ) - return fs - - def update_source_metadata(self, **kwargs): - fs = kwargs.get("fs", self._get_fs_from_uri()) - version = ( - fs.checksum(self.uri) if fs.protocol != "gs" else fs.info(self.uri).get("etag", "") - ) - file_exists = fs.exists(self.uri) - self.source_metadata = SourceMetadata( - date_created=self.created_at, - date_modified=self.modified_date, - version=version, - source_url=self.uri, - exists=file_exists, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - fs = self._get_fs_from_uri() - self.update_source_metadata(fs=fs) - logger.info(f"using a {fs} filesystem to collect table data") - self._create_full_tmp_dir_path() - - df = self._get_df(filesystem=fs) - - logger.info(f"writing {len(df)} rows to {self.filename}") - df.to_csv(self.filename) - - @SourceConnectionNetworkError.wrap - def _get_df(self, filesystem): - import pyarrow.parquet as pq - - return pq.ParquetDataset(self.uri, filesystem=filesystem).read_pandas().to_pandas() - - -@dataclass -class DeltaTableSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleDeltaTableConfig - delta_table: t.Optional["DeltaTable"] = None - - def check_connection(self): - pass - - @requires_dependencies(["deltalake"], extras="delta-table") - def initialize(self): - from deltalake import DeltaTable - - self.delta_table = DeltaTable( - table_uri=self.connector_config.table_uri, - version=self.connector_config.version, - storage_options=self.connector_config.storage_options, - without_files=self.connector_config.without_files, - ) - rows = self.delta_table.to_pyarrow_dataset().count_rows() - if not rows > 0: - raise ValueError(f"no data found at {self.connector_config.table_uri}") - logger.info(f"processing {rows} rows of data") - - def get_ingest_docs(self): - """Batches the results into distinct docs""" - if not self.delta_table: - raise ValueError("delta table was never initialized") - actions = self.delta_table.get_add_actions().to_pandas() - mod_date_dict = { - row["path"]: str(row["modification_time"]) for _, row in actions.iterrows() - } - created_at = dt.fromtimestamp(self.delta_table.metadata().created_time / 1000) - return [ - DeltaTableIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - uri=uri, - modified_date=mod_date_dict[os.path.basename(uri)], - created_at=str(created_at), - ) - for uri in self.delta_table.file_uris() - ] - - -@dataclass -class DeltaTableWriteConfig(WriteConfig): - drop_empty_cols: bool = False - mode: t.Literal["error", "append", "overwrite", "ignore"] = "error" - schema_mode: t.Optional[t.Literal["merge", "overwrite"]] = None - engine: t.Literal["pyarrow", "rust"] = "pyarrow" - - -@dataclass -class DeltaTableDestinationConnector(BaseDestinationConnector): - write_config: DeltaTableWriteConfig - connector_config: SimpleDeltaTableConfig - - @requires_dependencies(["deltalake"], extras="delta-table") - def initialize(self): - pass - - def check_connection(self): - pass - - @requires_dependencies(["deltalake"], extras="delta-table") - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - from deltalake.writer import write_deltalake - - from unstructured.ingest.utils.table import convert_to_pandas_dataframe - - df = convert_to_pandas_dataframe( - elements_dict=elements_dict, - drop_empty_cols=self.write_config.drop_empty_cols, - ) - logger.info( - f"writing {len(df)} rows to destination table " - f"at {self.connector_config.table_uri}\ndtypes: {df.dtypes}", - ) - writer_kwargs = { - "table_or_uri": self.connector_config.table_uri, - "data": df, - "mode": self.write_config.mode, - "engine": self.write_config.engine, - } - if self.write_config.schema_mode is not None: - writer_kwargs["schema_mode"] = self.write_config.schema_mode - # NOTE: deltalake writer on Linux sometimes can finish but still trigger a SIGABRT and cause - # ingest to fail, even though all tasks are completed normally. Putting the writer into a - # process mitigates this issue by ensuring python interpreter waits properly for deltalake's - # rust backend to finish - writer = Process( - target=write_deltalake, - kwargs=writer_kwargs, - ) - writer.start() - writer.join() diff --git a/unstructured/ingest/connector/discord.py b/unstructured/ingest/connector/discord.py deleted file mode 100644 index bfbfc8fbd..000000000 --- a/unstructured/ingest/connector/discord.py +++ /dev/null @@ -1,180 +0,0 @@ -import datetime as dt -import typing as t -from dataclasses import dataclass -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import ( - requires_dependencies, -) - - -@dataclass -class DiscordAccessConfig(AccessConfig): - token: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleDiscordConfig(BaseConnectorConfig): - """Connector config where channels is a comma separated list of - Discord channels to pull messages from. - """ - - # Discord Specific Options - access_config: DiscordAccessConfig - channels: t.List[str] - period: t.Optional[int] = None - - -@dataclass -class DiscordIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - Also includes a cleanup method. When things go wrong and the cleanup - method is not called, the file is left behind on the filesystem to assist debugging. - """ - - connector_config: SimpleDiscordConfig - channel: str - days: t.Optional[int] = None - registry_name: str = "discord" - - # NOTE(crag): probably doesn't matter, but intentionally not defining tmp_download_file - # __post_init__ for multiprocessing simplicity (no Path objects in initially - # instantiated object) - def _tmp_download_file(self): - channel_file = self.channel + ".txt" - return Path(self.read_config.download_dir) / channel_file - - @property - def _output_filename(self): - output_file = self.channel + ".json" - return Path(self.processor_config.output_dir) / output_file - - def _create_full_tmp_dir_path(self): - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - @SourceConnectionNetworkError.wrap - @requires_dependencies(dependencies=["discord"], extras="discord") - def _get_messages(self): - """Actually fetches the data from discord.""" - import discord - from discord.ext import commands - - messages: t.List[discord.Message] = [] - jumpurl: t.List[str] = [] - intents = discord.Intents.default() - intents.message_content = True - bot = commands.Bot(command_prefix=">", intents=intents) - - @bot.event - async def on_ready(): - try: - after_date = None - if self.days: - after_date = dt.datetime.utcnow() - dt.timedelta(days=self.days) - channel = bot.get_channel(int(self.channel)) - jumpurl.append(channel.jump_url) # type: ignore - async for msg in channel.history(after=after_date): # type: ignore - messages.append(msg) - await bot.close() - except Exception: - logger.error("Error fetching messages") - await bot.close() - raise - - bot.run(self.connector_config.access_config.token) - jump_url = None if len(jumpurl) < 1 else jumpurl[0] - return messages, jump_url - - def update_source_metadata(self, **kwargs): - messages, jump_url = kwargs.get("messages_tuple", self._get_messages()) - if messages == []: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - dates = [m.created_at for m in messages if m.created_at] - dates.sort() - self.source_metadata = SourceMetadata( - date_created=dates[0].isoformat(), - date_modified=dates[-1].isoformat(), - source_url=jump_url, - exists=True, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - self._create_full_tmp_dir_path() - - messages, jump_url = self._get_messages() - self.update_source_metadata(messages_tuple=(messages, jump_url)) - if messages == []: - raise ValueError(f"Failed to retrieve messages from Discord channel {self.channel}") - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - with open(self._tmp_download_file(), "w") as f: - for m in messages: - f.write(m.content + "\n") - - @property - def filename(self): - """The filename of the file created from a discord channel""" - return self._tmp_download_file() - - @property - def version(self) -> t.Optional[str]: - return None - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "channel": self.channel, - } - - -class DiscordSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Objects of this class support fetching document(s) from""" - - connector_config: SimpleDiscordConfig - - def initialize(self): - pass - - @requires_dependencies(dependencies=["discord"], extras="discord") - def check_connection(self): - import asyncio - - import discord - from discord.client import Client - - intents = discord.Intents.default() - try: - client = Client(intents=intents) - asyncio.run(client.start(token=self.connector_config.access_config.token)) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def get_ingest_docs(self): - return [ - DiscordIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - channel=channel, - days=self.connector_config.period, - ) - for channel in self.connector_config.channels - ] diff --git a/unstructured/ingest/connector/elasticsearch.py b/unstructured/ingest/connector/elasticsearch.py deleted file mode 100644 index aa8ff1d9e..000000000 --- a/unstructured/ingest/connector/elasticsearch.py +++ /dev/null @@ -1,397 +0,0 @@ -import copy -import hashlib -import typing as t -import uuid -from dataclasses import dataclass, field -from pathlib import Path - -from dataclasses_json.core import Json - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - BaseIngestDocBatch, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import generator_batching_wbytes -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from elasticsearch import Elasticsearch - - -@dataclass -class ElasticsearchAccessConfig(AccessConfig): - hosts: t.Optional[t.List[str]] = None - username: t.Optional[str] = None - password: t.Optional[str] = enhanced_field(default=None, sensitive=True) - cloud_id: t.Optional[str] = None - api_key: t.Optional[str] = enhanced_field( - default=None, sensitive=True, overload_name="es_api_key" - ) - api_key_id: t.Optional[str] = None - bearer_auth: t.Optional[str] = enhanced_field(default=None, sensitive=True) - ca_certs: t.Optional[str] = None - ssl_assert_fingerprint: t.Optional[str] = enhanced_field(default=None, sensitive=True) - - def to_dict(self, **kwargs) -> t.Dict[str, Json]: - d = super().to_dict(**kwargs) - # Update auth related fields to conform to what the SDK expects based on the - # supported methods: - # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html - if not self.ca_certs: - # ES library already sets a default for this, don't want to - # introduce data by setting it to None - d.pop("ca_certs") - if self.password and (self.cloud_id or self.ca_certs or self.ssl_assert_fingerprint): - d.pop("password") - d["basic_auth"] = ("elastic", self.password) - elif not self.cloud_id and self.username and self.password: - d.pop("username", None) - d.pop("password", None) - d["basic_auth"] = (self.username, self.password) - elif self.api_key and self.api_key_id: - d.pop("api_key_id", None) - d.pop("api_key", None) - d["api_key"] = (self.api_key_id, self.api_key) - # This doesn't exist on the client init, remove: - d.pop("api_key_id", None) - return d - - -@dataclass -class SimpleElasticsearchConfig(BaseConnectorConfig): - """Connector config where: - url is the url to access the elasticsearch server, - index_name is the name of the index to reach to, - """ - - index_name: str - batch_size: int = 100 - fields: t.List[str] = field(default_factory=list) - access_config: ElasticsearchAccessConfig = None - - -@dataclass -class ElasticsearchDocumentMeta: - """Metadata specifying: - name of the elasticsearch index that is being reached to, - and the id of document that is being reached to, - """ - - index_name: str - document_id: str - - -@dataclass -class ElasticsearchIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - - Current implementation creates a python Elasticsearch client to fetch each doc, - rather than creating a client for each thread. - """ - - connector_config: SimpleElasticsearchConfig - document_meta: ElasticsearchDocumentMeta - document: dict = field(default_factory=dict) - registry_name: str = "elasticsearch" - - # TODO: remove one of filename or _tmp_download_file, using a wrapper - @property - def filename(self): - f = self.document_meta.document_id - if self.connector_config.fields: - f = "{}-{}".format( - f, - hashlib.sha256(",".join(self.connector_config.fields).encode()).hexdigest()[:8], - ) - return ( - Path(self.read_config.download_dir) / self.document_meta.index_name / f"{f}.txt" - ).resolve() - - @property - def _output_filename(self): - """Create filename document id combined with a hash of the query to uniquely identify - the output file.""" - # Generate SHA256 hash and take the first 8 characters - filename = self.document_meta.document_id - if self.connector_config.fields: - filename = "{}-{}".format( - filename, - hashlib.sha256(",".join(self.connector_config.fields).encode()).hexdigest()[:8], - ) - output_file = f"{filename}.json" - return ( - Path(self.processor_config.output_dir) / self.connector_config.index_name / output_file - ) - - def update_source_metadata(self, **kwargs): - if self.document is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - self.source_metadata = SourceMetadata( - version=self.document["_version"], - exists=True, - ) - - @SourceConnectionError.wrap - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - pass - - @property - def date_created(self) -> t.Optional[str]: - return None - - @property - def date_modified(self) -> t.Optional[str]: - return None - - @property - def source_url(self) -> t.Optional[str]: - return None - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "hosts": self.connector_config.access_config.hosts, - "index_name": self.connector_config.index_name, - "document_id": self.document_meta.document_id, - } - - -@dataclass -class ElasticsearchIngestDocBatch(BaseIngestDocBatch): - connector_config: SimpleElasticsearchConfig - ingest_docs: t.List[ElasticsearchIngestDoc] = field(default_factory=list) - list_of_ids: t.List[str] = field(default_factory=list) - registry_name: str = "elasticsearch_batch" - - def __post_init__(self): - # Until python3.8 is deprecated, this is a limitation of dataclass inheritance - # to make it a required field - if len(self.list_of_ids) == 0: - raise ValueError("list_of_ids is required") - - @property - def unique_id(self) -> str: - return ",".join(sorted(self.list_of_ids)) - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def _get_docs(self): - from elasticsearch import Elasticsearch - from elasticsearch.helpers import scan - - es = Elasticsearch(**self.connector_config.access_config.to_dict(apply_name_overload=False)) - scan_query = { - "_source": self.connector_config.fields, - "version": True, - "query": {"ids": {"values": self.list_of_ids}}, - } - - result = scan( - es, - query=scan_query, - scroll="1m", - index=self.connector_config.index_name, - ) - return list(result) - - @SourceConnectionError.wrap - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def get_files(self): - documents = self._get_docs() - for doc in documents: - ingest_doc = ElasticsearchIngestDoc( - processor_config=self.processor_config, - read_config=self.read_config, - connector_config=self.connector_config, - document=doc, - document_meta=ElasticsearchDocumentMeta( - self.connector_config.index_name, doc["_id"] - ), - ) - ingest_doc.update_source_metadata() - doc_body = doc["_source"] - filename = ingest_doc.filename - flattened_dict = flatten_dict(dictionary=doc_body) - str_values = [str(value) for value in flattened_dict.values()] - concatenated_values = "\n".join(str_values) - - filename.parent.mkdir(parents=True, exist_ok=True) - with open(filename, "w", encoding="utf8") as f: - f.write(concatenated_values) - self.ingest_docs.append(ingest_doc) - - -@dataclass -class ElasticsearchSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Fetches particular fields from all documents in a given elasticsearch cluster and index""" - - connector_config: SimpleElasticsearchConfig - _es: t.Optional["Elasticsearch"] = field(init=False, default=None) - - @property - def es(self): - from elasticsearch import Elasticsearch - - if self._es is None: - self._es = Elasticsearch( - **self.connector_config.access_config.to_dict(apply_name_overload=False) - ) - return self._es - - def check_connection(self): - try: - self.es.perform_request("HEAD", "/", headers={"accept": "application/json"}) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def __post_init__(self): - self.scan_query: dict = {"stored_fields": [], "query": {"match_all": {}}} - - def initialize(self): - pass - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def _get_doc_ids(self): - """Fetches all document ids in an index""" - from elasticsearch.helpers import scan - - hits = scan( - self.es, - query=self.scan_query, - scroll="1m", - index=self.connector_config.index_name, - ) - - return [hit["_id"] for hit in hits] - - def get_ingest_docs(self): - """Fetches all documents in an index, using ids that are fetched with _get_doc_ids""" - ids = self._get_doc_ids() - id_batches = [ - ids[ - i - * self.connector_config.batch_size : (i + 1) # noqa - * self.connector_config.batch_size - ] - for i in range( - (len(ids) + self.connector_config.batch_size - 1) - // self.connector_config.batch_size - ) - ] - return [ - ElasticsearchIngestDocBatch( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - list_of_ids=batched_ids, - ) - for batched_ids in id_batches - ] - - -@dataclass -class ElasticsearchWriteConfig(WriteConfig): - batch_size_bytes: int = 15_000_000 - num_processes: int = 1 - - -@dataclass -class ElasticsearchDestinationConnector(BaseDestinationConnector): - write_config: ElasticsearchWriteConfig - connector_config: SimpleElasticsearchConfig - _client: t.Optional["Elasticsearch"] = field(init=False, default=None) - - def to_dict(self, **kwargs): - """ - The _client variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle '_thread.lock' object - When serializing, remove it, meaning client data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - if hasattr(self_cp, "_client"): - setattr(self_cp, "_client", None) - return _asdict(self_cp, **kwargs) - - @DestinationConnectionError.wrap - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def generate_client(self) -> "Elasticsearch": - from elasticsearch import Elasticsearch - - return Elasticsearch( - **self.connector_config.access_config.to_dict(apply_name_overload=False) - ) - - @property - def client(self): - if self._client is None: - self._client = self.generate_client() - return self._client - - def initialize(self): - _ = self.client - - @DestinationConnectionError.wrap - def check_connection(self): - try: - assert self.client.ping() - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info( - f"writing document batches to destination" - f" index named {self.connector_config.index_name}" - f" at {self.connector_config.access_config.hosts}" - f" with batch size (in bytes) {self.write_config.batch_size_bytes}" - f" with {self.write_config.num_processes} (number of) processes" - ) - from elasticsearch.helpers import parallel_bulk - - for batch in generator_batching_wbytes( - elements_dict, batch_size_limit_bytes=self.write_config.batch_size_bytes - ): - for success, info in parallel_bulk( - self.client, batch, thread_count=self.write_config.num_processes - ): - if not success: - logger.error( - "upload failed for a batch in elasticsearch destination connector:", info - ) - - def normalize_dict(self, element_dict: dict) -> dict: - return { - "_index": self.connector_config.index_name, - "_id": str(uuid.uuid4()), - "_source": { - "element_id": element_dict.pop("element_id", None), - "embeddings": element_dict.pop("embeddings", None), - "text": element_dict.pop("text", None), - "type": element_dict.pop("type", None), - "metadata": flatten_dict( - element_dict.pop("metadata", None), - separator="-", - ), - }, - } diff --git a/unstructured/ingest/connector/fsspec/__init__.py b/unstructured/ingest/connector/fsspec/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/connector/fsspec/azure.py b/unstructured/ingest/connector/fsspec/azure.py deleted file mode 100644 index 169cda6a0..000000000 --- a/unstructured/ingest/connector/fsspec/azure.py +++ /dev/null @@ -1,78 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecDestinationConnector, - FsspecIngestDoc, - FsspecSourceConnector, - FsspecWriteConfig, - SimpleFsspecConfig, - WriteTextConfig, -) -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import AccessConfig -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - - -@dataclass -class AzureWriteTextConfig(WriteTextConfig): - overwrite: bool = False - - -@dataclass -class AzureWriteConfig(FsspecWriteConfig): - write_text_config: t.Optional[AzureWriteTextConfig] = None - - -@dataclass -class AzureAccessConfig(AccessConfig): - account_name: t.Optional[str] = enhanced_field(default=None, sensitive=True) - account_key: t.Optional[str] = enhanced_field(default=None, sensitive=True) - connection_string: t.Optional[str] = enhanced_field(default=None, sensitive=True) - sas_token: t.Optional[str] = enhanced_field(default=None, sensitive=True) - - -@dataclass -class SimpleAzureBlobStorageConfig(SimpleFsspecConfig): - access_config: AzureAccessConfig = None - - -@dataclass -class AzureBlobStorageIngestDoc(FsspecIngestDoc): - connector_config: SimpleAzureBlobStorageConfig - registry_name: str = "azure" - - @SourceConnectionError.wrap - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - def get_file(self): - super().get_file() - - -@dataclass -class AzureBlobStorageSourceConnector(FsspecSourceConnector): - connector_config: SimpleAzureBlobStorageConfig - - def __post_init__(self): - self.ingest_doc_cls: t.Type[AzureBlobStorageIngestDoc] = AzureBlobStorageIngestDoc - - -@dataclass -class AzureBlobStorageDestinationConnector(FsspecDestinationConnector): - connector_config: SimpleAzureBlobStorageConfig - write_config: AzureWriteConfig - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - def initialize(self): - super().initialize() - - @requires_dependencies(["adlfs"], extras="azure") - def check_connection(self): - from adlfs import AzureBlobFileSystem - - try: - AzureBlobFileSystem(**self.connector_config.get_access_config()) - except ValueError as connection_error: - logger.error(f"failed to validate connection: {connection_error}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {connection_error}") diff --git a/unstructured/ingest/connector/fsspec/box.py b/unstructured/ingest/connector/fsspec/box.py deleted file mode 100644 index 67a56fa69..000000000 --- a/unstructured/ingest/connector/fsspec/box.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -Box Connector -Box does not make it simple to download files with an App. -First of all, this does not work with a free Box account. -Make sure the App service email is a collaborator for your folder (co-owner or editor) -Make sure you have the 'write all files' application scope -Maybe check 'Make api calls as the as-user header' -REAUTHORIZE app after making any of the above changes -""" - -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecDestinationConnector, - FsspecIngestDoc, - FsspecSourceConnector, - FsspecWriteConfig, - SimpleFsspecConfig, -) -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import AccessConfig -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - - -class AccessTokenError(Exception): - """There is a problem with the Access Token.""" - - -@dataclass -class BoxWriteConfig(FsspecWriteConfig): - pass - - -@dataclass -class BoxAccessConfig(AccessConfig): - box_app_config: t.Optional[str] = None - - -@dataclass -class SimpleBoxConfig(SimpleFsspecConfig): - access_config: BoxAccessConfig = None - - @requires_dependencies(["boxfs"], extras="box") - def get_access_config(self) -> dict: - # Return access_kwargs with oauth. The oauth object can not be stored directly in the config - # because it is not serializable. - from boxsdk import JWTAuth - - access_kwargs_with_oauth: dict[str, t.Any] = { - "oauth": JWTAuth.from_settings_file( - self.access_config.box_app_config, - ), - } - access_config: dict[str, t.Any] = self.access_config.to_dict() - access_config.pop("box_app_config", None) - access_kwargs_with_oauth.update(access_config) - - return access_kwargs_with_oauth - - -@dataclass -class BoxIngestDoc(FsspecIngestDoc): - connector_config: SimpleBoxConfig - registry_name: str = "box" - - @SourceConnectionError.wrap - @requires_dependencies(["boxfs", "fsspec"], extras="box") - def get_file(self): - super().get_file() - - -@dataclass -class BoxSourceConnector(FsspecSourceConnector): - connector_config: SimpleBoxConfig - - @requires_dependencies(["boxfs"], extras="box") - def check_connection(self): - from boxfs import BoxFileSystem - - try: - BoxFileSystem(**self.connector_config.get_access_config()) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def __post_init__(self): - self.ingest_doc_cls: t.Type[BoxIngestDoc] = BoxIngestDoc - - -@dataclass -class BoxDestinationConnector(FsspecDestinationConnector): - connector_config: SimpleBoxConfig - write_config: BoxWriteConfig - - @requires_dependencies(["boxfs", "fsspec"], extras="box") - def initialize(self): - super().initialize() - - @requires_dependencies(["boxfs"], extras="box") - def check_connection(self): - from boxfs import BoxFileSystem - - try: - BoxFileSystem(**self.connector_config.get_access_config()) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") diff --git a/unstructured/ingest/connector/fsspec/dropbox.py b/unstructured/ingest/connector/fsspec/dropbox.py deleted file mode 100644 index 23647bb6d..000000000 --- a/unstructured/ingest/connector/fsspec/dropbox.py +++ /dev/null @@ -1,160 +0,0 @@ -""" -Dropbox Connector -The Dropbox Connector presents a couple abnormal situations. -1) They don't have an unexpiring token -2) They require a forward slash `/` in front of the remote_file_path. This presents -some real problems creating paths. When appending a path that begins with a -forward slash to any path, whether using the / shorthand or joinpath, causes the -starting path to disappear. So the `/` needs to be stripped off. -3) To list and get files from the root directory Dropbox you need a ""," ", or " /" -""" - -import re -from dataclasses import dataclass -from pathlib import Path -from typing import Type - -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecDestinationConnector, - FsspecIngestDoc, - FsspecSourceConnector, - FsspecWriteConfig, - SimpleFsspecConfig, -) -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError -from unstructured.ingest.interfaces import AccessConfig -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - - -class MissingFolderError(Exception): - """There is no folder by that name. For root try `dropbox:// /`""" - - -@dataclass -class DropboxAccessConfig(AccessConfig): - token: str = enhanced_field(sensitive=True) - - -@dataclass -class DropboxWriteConfig(FsspecWriteConfig): - pass - - -@dataclass -class SimpleDropboxConfig(SimpleFsspecConfig): - access_config: DropboxAccessConfig = None - - -@dataclass -class DropboxIngestDoc(FsspecIngestDoc): - connector_config: SimpleDropboxConfig - registry_name: str = "dropbox" - - @SourceConnectionError.wrap - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def get_file(self): - super().get_file() - - @property - def _output_filename(self): - # Dropbox requires a forward slash at the front of the folder path. This - # creates some complications in path joining so a custom path is created here. - # Dropbox uses an empty string `""`, or a space `" "`` or a `" /"` to list root - if self.connector_config.dir_path == " ": - return Path(self.processor_config.output_dir) / re.sub( - "^/", - "", - f"{self.remote_file_path}.json", - ) - else: - return ( - Path(self.processor_config.output_dir) - / f"{self.remote_file_path.replace(f'/{self.connector_config.dir_path}/', '')}.json" - ) - - def _tmp_download_file(self): - # Dropbox requires a forward slash at the front of the folder path. This - # creates some complications in path joining so a custom path is created here. - # Dropbox uses an empty string `""`, or a space `" "`` or a `" /"` to list root - download_dir: str = self.read_config.download_dir if self.read_config.download_dir else "" - if not download_dir: - return "" - if self.connector_config.dir_path == " ": - return Path(download_dir) / re.sub( - "^/", - "", - self.remote_file_path, - ) - else: - return Path(download_dir) / self.remote_file_path.replace( - f"/{self.connector_config.dir_path}/", - "", - ) - - -@dataclass -class DropboxSourceConnector(FsspecSourceConnector): - connector_config: SimpleDropboxConfig - - def __post_init__(self): - self.ingest_doc_cls: Type[DropboxIngestDoc] = DropboxIngestDoc - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def initialize(self): - from fsspec import AbstractFileSystem, get_filesystem_class - - try: - self.fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - # Dropbox requires a forward slash at the front of the folder path. This - # creates some complications in path joining so a custom path is created here. - ls_output = self.fs.ls(f"/{self.connector_config.path_without_protocol}") - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - if ls_output and len(ls_output) >= 1: - return - elif ls_output: - raise ValueError( - f"No objects found in {self.connector_config.remote_url}.", - ) - else: - raise MissingFolderError( - "There is no folder by that name. For root try `dropbox:// /`", - ) - - def _list_files(self): - # Dropbox requires a forward slash at the front of the folder path. This - # creates some complications in path joining so a custom path is created here. - if not self.connector_config.recursive: - # fs.ls does not walk directories - # directories that are listed in cloud storage can cause problems because they are seen - # as 0byte files - return [ - x.get("name") - for x in self.fs.ls( - f"/{self.connector_config.path_without_protocol}", - detail=True, - ) - if x.get("size") - ] - else: - # fs.find will recursively walk directories - # "size" is a common key for all the cloud protocols with fs - return [ - k - for k, v in self.fs.find( - f"/{self.connector_config.path_without_protocol}", - detail=True, - ).items() - if v.get("size") - ] - - -@dataclass -class DropboxDestinationConnector(FsspecDestinationConnector): - connector_config: SimpleFsspecConfig - write_config: DropboxWriteConfig diff --git a/unstructured/ingest/connector/fsspec/fsspec.py b/unstructured/ingest/connector/fsspec/fsspec.py deleted file mode 100644 index 1b60a1d87..000000000 --- a/unstructured/ingest/connector/fsspec/fsspec.py +++ /dev/null @@ -1,359 +0,0 @@ -import fnmatch -import json -import os -import typing as t -from abc import ABC -from contextlib import suppress -from dataclasses import dataclass -from pathlib import Path, PurePath - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.error import ( - DestinationConnectionError, - SourceConnectionError, - SourceConnectionNetworkError, -) -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseDestinationConnector, - BaseSingleIngestDoc, - BaseSourceConnector, - FsspecConfig, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.compression import ( - TAR_FILE_EXT, - ZIP_FILE_EXT, - CompressionSourceConnectorMixin, -) -from unstructured.utils import ( - requires_dependencies, -) - -SUPPORTED_REMOTE_FSSPEC_PROTOCOLS = [ - "s3", - "s3a", - "abfs", - "az", - "gs", - "gcs", - "box", - "dropbox", - "sftp", -] - - -@dataclass -class SimpleFsspecConfig(FsspecConfig, BaseConnectorConfig): - pass - - -@dataclass -class FsspecIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - - Also includes a cleanup method. When things go wrong and the cleanup - method is not called, the file is left behind on the filesystem to assist debugging. - """ - - connector_config: SimpleFsspecConfig - remote_file_path: str - - def _tmp_download_file(self): - download_dir = self.read_config.download_dir if self.read_config.download_dir else "" - return Path(download_dir) / self.remote_file_path.replace( - f"{self.connector_config.dir_path}/", - "", - ) - - @property - def _output_filename(self): - # Dynamically parse filename , can change if remote path was pointing to the single - # file, a directory, or nested directory - if self.remote_file_path == self.connector_config.path_without_protocol: - file = self.remote_file_path.split("/")[-1] - filename = f"{file}.json" - else: - path_without_protocol = ( - self.connector_config.path_without_protocol - if self.connector_config.path_without_protocol.endswith("/") - else f"{self.connector_config.path_without_protocol}/" - ) - filename = f"{self.remote_file_path.replace(path_without_protocol, '')}.json" - return Path(self.processor_config.output_dir) / filename - - def _create_full_tmp_dir_path(self): - """Includes "directories" in the object path""" - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - """Fetches the file from the current filesystem and stores it locally.""" - from fsspec import AbstractFileSystem, get_filesystem_class - - self._create_full_tmp_dir_path() - fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - self._get_file(fs=fs) - fs.get(rpath=self.remote_file_path, lpath=self._tmp_download_file().as_posix()) - self.update_source_metadata() - - @SourceConnectionNetworkError.wrap - def _get_file(self, fs): - fs.get(rpath=self.remote_file_path, lpath=self._tmp_download_file().as_posix()) - - @requires_dependencies(["fsspec"]) - def update_source_metadata(self): - from fsspec import AbstractFileSystem, get_filesystem_class - - fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - - date_created = None - with suppress(NotImplementedError): - date_created = fs.created(self.remote_file_path).isoformat() - - date_modified = None - with suppress(NotImplementedError): - date_modified = fs.modified(self.remote_file_path).isoformat() - - version = ( - fs.checksum(self.remote_file_path) - if self.connector_config.protocol != "gs" - else fs.info(self.remote_file_path).get("etag", "") - ) - file_exists = fs.exists(self.remote_file_path) - self.source_metadata = SourceMetadata( - date_created=date_created, - date_modified=date_modified, - version=str(version), - source_url=f"{self.connector_config.protocol}://{self.remote_file_path}", - exists=file_exists, - ) - - @property - def filename(self): - """The filename of the file after downloading from cloud""" - return self._tmp_download_file() - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - """Returns the equivalent of ls in dict""" - return { - "protocol": self.connector_config.protocol, - "remote_file_path": self.remote_file_path, - } - - -@dataclass -class FsspecSourceConnector( - SourceConnectorCleanupMixin, - CompressionSourceConnectorMixin, - BaseSourceConnector, -): - """Objects of this class support fetching document(s) from""" - - connector_config: SimpleFsspecConfig - - def check_connection(self): - from fsspec import get_filesystem_class - - try: - fs = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - fs.ls(path=self.connector_config.path_without_protocol, detail=False) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def __post_init__(self): - self.ingest_doc_cls: t.Type[FsspecIngestDoc] = FsspecIngestDoc - - def initialize(self): - from fsspec import AbstractFileSystem, get_filesystem_class - - self.fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - - """Verify that can get metadata for an object, validates connections info.""" - ls_output = self.fs.ls(self.connector_config.path_without_protocol, detail=False) - if len(ls_output) < 1: - raise ValueError( - f"No objects found in {self.connector_config.remote_url}.", - ) - - def _list_files(self): - if not self.connector_config.recursive: - # fs.ls does not walk directories - # directories that are listed in cloud storage can cause problems - # because they are seen as 0 byte files - return [ - x.get("name") - for x in self.fs.ls(self.connector_config.path_without_protocol, detail=True) - if x.get("size") > 0 - ] - else: - # fs.find will recursively walk directories - # "size" is a common key for all the cloud protocols with fs - return [ - k - for k, v in self.fs.find( - self.connector_config.path_without_protocol, - detail=True, - ).items() - if v.get("size") > 0 - ] - - def does_path_match_glob(self, path: str) -> bool: - if self.connector_config.file_glob is None: - return True - patterns = self.connector_config.file_glob - for pattern in patterns: - if fnmatch.filter([path], pattern): - return True - logger.debug(f"The file {path!r} is discarded as it does not match any given glob.") - return False - - def get_ingest_docs(self): - raw_files = self._list_files() - # If glob filters provided, use to fiter on filepaths - files = [f for f in raw_files if self.does_path_match_glob(f)] - # remove compressed files - compressed_file_ext = TAR_FILE_EXT + ZIP_FILE_EXT - compressed_files = [] - uncompressed_files = [] - docs: t.List[BaseSingleIngestDoc] = [] - for file in files: - if any(file.endswith(ext) for ext in compressed_file_ext): - compressed_files.append(file) - else: - uncompressed_files.append(file) - docs.extend( - [ - self.ingest_doc_cls( - read_config=self.read_config, - connector_config=self.connector_config, - processor_config=self.processor_config, - remote_file_path=file, - ) - for file in uncompressed_files - ], - ) - if not self.connector_config.uncompress: - return docs - for compressed_file in compressed_files: - compressed_doc = self.ingest_doc_cls( - read_config=self.read_config, - processor_config=self.processor_config, - connector_config=self.connector_config, - remote_file_path=compressed_file, - ) - try: - local_ingest_docs = self.process_compressed_doc(doc=compressed_doc) - logger.info(f"adding {len(local_ingest_docs)} from {compressed_file}") - docs.extend(local_ingest_docs) - finally: - compressed_doc.cleanup_file() - return docs - - -@dataclass -class WriteTextConfig(EnhancedDataClassJsonMixin, ABC): - pass - - -@dataclass -class FsspecWriteConfig(WriteConfig): - write_text_config: t.Optional[WriteTextConfig] = None - - def get_write_text_config(self) -> t.Dict[str, t.Any]: - if write_text_kwargs := self.write_text_config: - return write_text_kwargs.to_dict() - return {} - - -@dataclass -class FsspecDestinationConnector(BaseDestinationConnector): - connector_config: SimpleFsspecConfig - write_config: FsspecWriteConfig - - def initialize(self): - from fsspec import AbstractFileSystem, get_filesystem_class - - self.fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - self.check_connection() - - def check_connection(self): - from fsspec import AbstractFileSystem, get_filesystem_class - - try: - fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - - # e.g. Dropbox path starts with / - bucket_name = "/" if self.connector_config.path_without_protocol.startswith("/") else "" - bucket_name += self.connector_config.dir_path.split("/")[0] - - logger.info(f"checking connection for destination {bucket_name}") - fs.ls(path=bucket_name, detail=False) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def write_dict( - self, - *args, - elements_dict: t.List[t.Dict[str, t.Any]], - filename: t.Optional[str] = None, - indent: int = 4, - encoding: str = "utf-8", - **kwargs, - ) -> None: - from fsspec import AbstractFileSystem, get_filesystem_class - - fs: AbstractFileSystem = get_filesystem_class(self.connector_config.protocol)( - **self.connector_config.get_access_config(), - ) - - logger.info(f"Writing content using filesystem: {type(fs).__name__}") - - output_folder = self.connector_config.path_without_protocol - output_folder = os.path.join(output_folder) # Make sure folder ends with file seperator - filename = ( - filename.strip(os.sep) if filename else filename - ) # Make sure filename doesn't begin with file seperator - output_path = str(PurePath(output_folder, filename)) if filename else output_folder - full_output_path = f"{self.connector_config.protocol}://{output_path}" - logger.debug(f"uploading content to {full_output_path}") - write_text_configs = self.write_config.get_write_text_config() if self.write_config else {} - fs.write_text( - full_output_path, - json.dumps(elements_dict, indent=indent), - encoding=encoding, - **write_text_configs, - ) - - def get_elements_dict(self, docs: t.List[BaseSingleIngestDoc]) -> t.List[t.Dict[str, t.Any]]: - pass - - def write(self, docs: t.List[BaseSingleIngestDoc]) -> None: - for doc in docs: - file_path = doc.base_output_filename - filename = file_path if file_path else None - with open(doc._output_filename) as json_file: - logger.debug(f"uploading content from {doc._output_filename}") - json_list = json.load(json_file) - self.write_dict(elements_dict=json_list, filename=filename) diff --git a/unstructured/ingest/connector/fsspec/gcs.py b/unstructured/ingest/connector/fsspec/gcs.py deleted file mode 100644 index db5b0de44..000000000 --- a/unstructured/ingest/connector/fsspec/gcs.py +++ /dev/null @@ -1,82 +0,0 @@ -import typing as t -from dataclasses import dataclass -from pathlib import Path -from typing import Type - -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecDestinationConnector, - FsspecIngestDoc, - FsspecSourceConnector, - FsspecWriteConfig, - SimpleFsspecConfig, -) -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError -from unstructured.ingest.interfaces import AccessConfig -from unstructured.ingest.utils.string_and_date_utils import json_to_dict -from unstructured.utils import requires_dependencies - - -@dataclass -class GcsAccessConfig(AccessConfig): - token: t.Optional[str] = enhanced_field( - default=None, sensitive=True, overload_name="service_account_key" - ) - - def __post_init__(self): - ALLOWED_AUTH_VALUES = "google_default", "cache", "anon", "browser", "cloud" - - # Case: null value - if not self.token: - return - # Case: one of auth constants - if self.token in ALLOWED_AUTH_VALUES: - return - # Case: token as json - if isinstance(json_to_dict(self.token), dict): - self.token = json_to_dict(self.token) - return - # Case: path to token - if Path(self.token).is_file(): - return - - raise ValueError("Invalid auth token value") - - -@dataclass -class GcsWriteConfig(FsspecWriteConfig): - pass - - -@dataclass -class SimpleGcsConfig(SimpleFsspecConfig): - access_config: GcsAccessConfig = None - - -@dataclass -class GcsIngestDoc(FsspecIngestDoc): - connector_config: SimpleGcsConfig - registry_name: str = "gcs" - - @SourceConnectionError.wrap - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - def get_file(self): - super().get_file() - - -@dataclass -class GcsSourceConnector(FsspecSourceConnector): - connector_config: SimpleGcsConfig - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - def initialize(self): - super().initialize() - - def __post_init__(self): - self.ingest_doc_cls: Type[GcsIngestDoc] = GcsIngestDoc - - -@dataclass -class GcsDestinationConnector(FsspecDestinationConnector): - connector_config: SimpleGcsConfig - write_config: GcsWriteConfig diff --git a/unstructured/ingest/connector/fsspec/s3.py b/unstructured/ingest/connector/fsspec/s3.py deleted file mode 100644 index 799276a27..000000000 --- a/unstructured/ingest/connector/fsspec/s3.py +++ /dev/null @@ -1,62 +0,0 @@ -import typing as t -from dataclasses import dataclass -from typing import Type - -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecDestinationConnector, - FsspecIngestDoc, - FsspecSourceConnector, - FsspecWriteConfig, - SimpleFsspecConfig, -) -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.interfaces import AccessConfig -from unstructured.utils import requires_dependencies - - -@dataclass -class S3AccessConfig(AccessConfig): - anon: bool = enhanced_field(default=False, overload_name="anonymous") - endpoint_url: t.Optional[str] = None - key: t.Optional[str] = enhanced_field(default=None, sensitive=True) - secret: t.Optional[str] = enhanced_field(default=None, sensitive=True) - token: t.Optional[str] = enhanced_field(default=None, sensitive=True) - - -@dataclass -class S3WriteConfig(FsspecWriteConfig): - pass - - -@dataclass -class SimpleS3Config(SimpleFsspecConfig): - access_config: S3AccessConfig = enhanced_field(default=None) - - -@dataclass -class S3IngestDoc(FsspecIngestDoc): - connector_config: SimpleS3Config - remote_file_path: str - registry_name: str = "s3" - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - def get_file(self): - super().get_file() - - -@dataclass -class S3SourceConnector(FsspecSourceConnector): - connector_config: SimpleS3Config - - def __post_init__(self): - self.ingest_doc_cls: Type[S3IngestDoc] = S3IngestDoc - - -@dataclass -class S3DestinationConnector(FsspecDestinationConnector): - connector_config: SimpleS3Config - write_config: S3WriteConfig - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - def initialize(self): - super().initialize() diff --git a/unstructured/ingest/connector/fsspec/sftp.py b/unstructured/ingest/connector/fsspec/sftp.py deleted file mode 100644 index f179fc233..000000000 --- a/unstructured/ingest/connector/fsspec/sftp.py +++ /dev/null @@ -1,81 +0,0 @@ -import os -from dataclasses import dataclass -from pathlib import Path -from typing import Type -from urllib.parse import urlparse - -from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecIngestDoc, - FsspecSourceConnector, - SimpleFsspecConfig, -) -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError -from unstructured.ingest.interfaces import AccessConfig -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - - -@dataclass -class SftpAccessConfig(AccessConfig): - username: str - password: str = enhanced_field(sensitive=True) - host: str = "" - port: int = 22 - look_for_keys: bool = False - allow_agent: bool = False - - -@dataclass -class SimpleSftpConfig(SimpleFsspecConfig): - access_config: SftpAccessConfig = None - - def __post_init__(self): - super().__post_init__() - - _, ext = os.path.splitext(self.remote_url) - parsed_url = urlparse(self.remote_url) - if ext: - # We only want the file_path if it has an extension - self.file_path = Path(self.remote_url).name - self.dir_path = Path(parsed_url.path).parent.as_posix().lstrip("/") - self.path_without_protocol = self.dir_path - else: - self.file_path = "" - self.dir_path = parsed_url.path.lstrip("/") - self.path_without_protocol = self.dir_path - self.access_config.host = parsed_url.hostname or self.access_config.host - self.access_config.port = parsed_url.port or self.access_config.port - - -@dataclass -class SftpIngestDoc(FsspecIngestDoc): - connector_config: SimpleSftpConfig - registry_name: str = "sftp" - - @SourceConnectionError.wrap - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def get_file(self): - super().get_file() - - -@dataclass -class SftpSourceConnector(FsspecSourceConnector): - connector_config: SimpleSftpConfig - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def initialize(self): - super().initialize() - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def check_connection(self): - from fsspec.implementations.sftp import SFTPFileSystem - - try: - SFTPFileSystem(**self.connector_config.get_access_config()) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def __post_init__(self): - self.ingest_doc_cls: Type[SftpIngestDoc] = SftpIngestDoc diff --git a/unstructured/ingest/connector/git.py b/unstructured/ingest/connector/git.py deleted file mode 100644 index e03b6f4e7..000000000 --- a/unstructured/ingest/connector/git.py +++ /dev/null @@ -1,124 +0,0 @@ -import fnmatch -import typing as t -from dataclasses import dataclass, field -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, -) -from unstructured.ingest.logger import logger - - -@dataclass -class GitAccessConfig(AccessConfig): - access_token: t.Optional[str] = enhanced_field( - default=None, sensitive=True, overload_name="git_access_token" - ) - - -@dataclass -class SimpleGitConfig(BaseConnectorConfig): - url: str - access_config: GitAccessConfig - branch: t.Optional[str] = enhanced_field(default=None, overload_name="git_branch") - file_glob: t.Optional[t.List[str]] = enhanced_field(default=None, overload_name="git_file_glob") - repo_path: str = field(init=False, repr=False) - - -@dataclass -class GitIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleGitConfig = field(repr=False) - path: str - - @property - def filename(self): - return (Path(self.read_config.download_dir) / self.path).resolve() - - @property - def _output_filename(self): - return Path(self.processor_config.output_dir) / f"{self.path}.json" - - @property - def record_locator(self) -> t.Dict[str, t.Any]: - record_locator = { - "repo_path": self.connector_config.repo_path, - "file_path": self.path, - } - if self.connector_config.branch is not None: - record_locator["branch"] = self.connector_config.branch - return record_locator - - def _create_full_tmp_dir_path(self): - """includes directories in in the gitlab repository""" - self.filename.parent.mkdir(parents=True, exist_ok=True) - - def update_source_metadata(self, **kwargs): - raise NotImplementedError() - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - """Fetches the "remote" doc and stores it locally on the filesystem.""" - self._create_full_tmp_dir_path() - self._fetch_and_write() - - def _fetch_content(self) -> None: - raise NotImplementedError() - - def _fetch_and_write(self) -> None: - raise NotImplementedError() - - -@dataclass -class GitSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleGitConfig - - def initialize(self): - pass - - def check_connection(self): - pass - - @staticmethod - def is_file_type_supported(path: str) -> bool: - # Workaround to ensure that auto.partition isn't fed with .yaml, .py, etc. files - # TODO: What to do with no filenames? e.g. LICENSE, Makefile, etc. - supported = path.endswith( - ( - ".md", - ".txt", - ".pdf", - ".doc", - ".docx", - ".eml", - ".heic", - ".html", - ".png", - ".jpg", - ".ppt", - ".pptx", - ".xml", - ), - ) - if not supported: - logger.debug( - f"The file {path!r} is discarded as it does not contain a supported filetype.", - ) - return supported - - def does_path_match_glob(self, path: str) -> bool: - if not self.connector_config.file_glob: - return True - patterns = self.connector_config.file_glob - for pattern in patterns: - if fnmatch.filter([path], pattern): - return True - logger.debug(f"The file {path!r} is discarded as it does not match any given glob.") - return False diff --git a/unstructured/ingest/connector/github.py b/unstructured/ingest/connector/github.py deleted file mode 100644 index 2a63b8f32..000000000 --- a/unstructured/ingest/connector/github.py +++ /dev/null @@ -1,173 +0,0 @@ -import typing as t -from dataclasses import dataclass -from datetime import datetime -from urllib.parse import urlparse - -import requests - -from unstructured.ingest.connector.git import ( - GitIngestDoc, - GitSourceConnector, - SimpleGitConfig, -) -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import SourceMetadata -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from github.Repository import Repository - - -@dataclass -class SimpleGitHubConfig(SimpleGitConfig): - def __post_init__(self): - parsed_gh_url = urlparse(self.url) - path_fragments = [fragment for fragment in parsed_gh_url.path.split("/") if fragment] - - # If a scheme and netloc are provided, ensure they are correct - # Additionally, ensure that the path contains two fragments - if ( - (parsed_gh_url.scheme and parsed_gh_url.scheme != "https") - or (parsed_gh_url.netloc and parsed_gh_url.netloc != "github.com") - or len(path_fragments) != 2 - ): - raise ValueError( - 'Please provide a valid URL, e.g. "https://github.com/Unstructured-IO/unstructured"' - ' or a repository owner/name pair, e.g. "Unstructured-IO/unstructured".', - ) - - # If there's no issues, store the core repository info - self.repo_path = parsed_gh_url.path - - @SourceConnectionError.wrap - @requires_dependencies(["github"], extras="github") - def get_repo(self) -> "Repository": - from github import Github - - github = Github(self.access_config.access_token) - return github.get_repo(self.repo_path) - - -@dataclass -class GitHubIngestDoc(GitIngestDoc): - connector_config: SimpleGitHubConfig - registry_name: str = "github" - - @property - def date_created(self) -> t.Optional[str]: - return None - - @requires_dependencies(["github"], extras="github") - def _fetch_file(self): - from github.GithubException import UnknownObjectException - - try: - content_file = self.connector_config.get_repo().get_contents(self.path) - except UnknownObjectException: - logger.error(f"File doesn't exists {self.connector_config.url}/{self.path}") - return None - - return content_file - - @SourceConnectionNetworkError.wrap - def _fetch_content(self, content_file): - contents = b"" - if ( - not content_file.content # type: ignore - and content_file.encoding == "none" # type: ignore - and content_file.size # type: ignore - ): - logger.info("File too large for the GitHub API, using direct download link instead.") - # NOTE: Maybe add a raise_for_status to catch connection timeout or HTTP Errors? - response = requests.get(content_file.download_url) # type: ignore - if response.status_code != 200: - logger.info("Direct download link has failed... Skipping this file.") - return None - else: - contents = response.content - else: - contents = content_file.decoded_content # type: ignore - return contents - - def update_source_metadata(self, **kwargs): - content_file = kwargs.get("content_file", self._fetch_file()) - if content_file is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - - date_modified = datetime.strptime( - content_file.last_modified, - "%a, %d %b %Y %H:%M:%S %Z", - ).isoformat() - self.source_metadata = SourceMetadata( - date_modified=date_modified, - version=content_file.etag, - source_url=content_file.download_url, - exists=True, - ) - - def _fetch_and_write(self) -> None: - content_file = self._fetch_file() - self.update_source_metadata(content_file=content_file) - contents = self._fetch_content(content_file) - if contents is None: - raise ValueError( - f"Failed to retrieve file from repo " - f"{self.connector_config.url}/{self.path}. Check logs", - ) - with open(self.filename, "wb") as f: - f.write(contents) - - -@dataclass -class GitHubSourceConnector(GitSourceConnector): - connector_config: SimpleGitHubConfig - - @requires_dependencies(["github"], extras="github") - def check_connection(self): - from github import Consts - from github.GithubRetry import GithubRetry - from github.Requester import Requester - - try: - requester = Requester( - auth=self.connector_config.access_config.access_token, - base_url=Consts.DEFAULT_BASE_URL, - timeout=Consts.DEFAULT_TIMEOUT, - user_agent=Consts.DEFAULT_USER_AGENT, - per_page=Consts.DEFAULT_PER_PAGE, - verify=True, - retry=GithubRetry(), - pool_size=None, - ) - url_base = ( - "/repositories/" if isinstance(self.connector_config.repo_path, int) else "/repos/" - ) - url = f"{url_base}{self.connector_config.repo_path}" - headers, _ = requester.requestJsonAndCheck("HEAD", url) - logger.debug(f"headers from HEAD request: {headers}") - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def get_ingest_docs(self): - repo = self.connector_config.get_repo() - # Load the Git tree with all files, and then create Ingest docs - # for all blobs, i.e. all files, ignoring directories - sha = self.connector_config.branch or repo.default_branch - git_tree = repo.get_git_tree(sha, recursive=True) - return [ - GitHubIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - path=element.path, - ) - for element in git_tree.tree - if element.type == "blob" - and self.is_file_type_supported(element.path) - and (not self.connector_config.file_glob or self.does_path_match_glob(element.path)) - ] diff --git a/unstructured/ingest/connector/gitlab.py b/unstructured/ingest/connector/gitlab.py deleted file mode 100644 index 1d1e6c5f8..000000000 --- a/unstructured/ingest/connector/gitlab.py +++ /dev/null @@ -1,142 +0,0 @@ -import typing as t -from dataclasses import dataclass -from urllib.parse import urlparse - -from unstructured.ingest.connector.git import ( - GitIngestDoc, - GitSourceConnector, - SimpleGitConfig, -) -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import SourceMetadata -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from gitlab.v4.objects.projects import Project - - -@dataclass -class SimpleGitlabConfig(SimpleGitConfig): - base_url: str = "https://gitlab.com" - - def __post_init__(self): - parsed_gh_url = urlparse(self.url) - # If a scheme or netloc are provided, use the parsed base url - if parsed_gh_url.scheme or parsed_gh_url.netloc: - self.base_url = f"{parsed_gh_url.scheme}://{parsed_gh_url.netloc}" - self.repo_path = parsed_gh_url.path - while self.repo_path.startswith("/"): - self.repo_path = self.repo_path[1:] - - @SourceConnectionError.wrap - @requires_dependencies(["gitlab"], extras="gitlab") - def get_project(self) -> "Project": - from gitlab import Gitlab - - gitlab = Gitlab(self.base_url, private_token=self.access_config.access_token) - return gitlab.projects.get(self.repo_path) - - -@dataclass -class GitLabIngestDoc(GitIngestDoc): - connector_config: SimpleGitlabConfig - registry_name: str = "gitlab" - - @property - def date_created(self) -> t.Optional[str]: - return None - - @property - def date_modified(self) -> t.Optional[str]: - return None - - @property - def source_url(self) -> t.Optional[str]: - return None - - @SourceConnectionNetworkError.wrap - @requires_dependencies(["gitlab"], extras="gitlab") - def _fetch_content(self): - from gitlab.exceptions import GitlabHttpError - - try: - project = self.connector_config.get_project() - content_file = project.files.get( - self.path, - ref=self.connector_config.branch or project.default_branch, - ) - except GitlabHttpError as e: - if e.response_code == 404: - logger.error(f"File doesn't exists {self.connector_config.url}/{self.path}") - return None - raise - return content_file - - def update_source_metadata(self, **kwargs): - content_file = kwargs.get("content_file", self._fetch_content()) - if content_file is None: - self.source_metadata = SourceMetadata( - exists=None, - ) - return - self.source_metadata = SourceMetadata( - version=content_file.attributes.get("last_commit_id", ""), - exists=True, - ) - - def _fetch_and_write(self) -> None: - content_file = self._fetch_content() - self.update_source_metadata(content_file=content_file) - if content_file is None: - raise ValueError( - f"Failed to retrieve file from repo " - f"{self.connector_config.url}/{self.path}. Check logs.", - ) - contents = content_file.decode() - with open(self.filename, "wb") as f: - f.write(contents) - - -@dataclass -class GitLabSourceConnector(GitSourceConnector): - connector_config: SimpleGitlabConfig - - @requires_dependencies(["gitlab"], extras="gitlab") - def check_connection(self): - from gitlab import Gitlab - from gitlab.exceptions import GitlabError - - try: - gitlab = Gitlab( - self.connector_config.base_url, - private_token=self.connector_config.access_config.access_token, - ) - gitlab.auth() - except GitlabError as gitlab_error: - logger.error(f"failed to validate connection: {gitlab_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {gitlab_error}") - - def get_ingest_docs(self): - # Load the Git tree with all files, and then create Ingest docs - # for all blobs, i.e. all files, ignoring directories - project = self.connector_config.get_project() - ref = self.connector_config.branch or project.default_branch - git_tree = project.repository_tree( - ref=ref, - recursive=True, - iterator=True, - all=True, - ) - return [ - GitLabIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - path=element["path"], - ) - for element in git_tree - if element["type"] == "blob" - and self.is_file_type_supported(element["path"]) - and (not self.connector_config.file_glob or self.does_path_match_glob(element["path"])) - ] diff --git a/unstructured/ingest/connector/google_drive.py b/unstructured/ingest/connector/google_drive.py deleted file mode 100644 index e3b0f931c..000000000 --- a/unstructured/ingest/connector/google_drive.py +++ /dev/null @@ -1,348 +0,0 @@ -import io -import json -import os -import typing as t -from dataclasses import dataclass, field -from datetime import datetime -from mimetypes import guess_extension -from pathlib import Path - -from unstructured.file_utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSessionHandle, - BaseSingleIngestDoc, - BaseSourceConnector, - ConfigSessionHandleMixin, - IngestDocCleanupMixin, - IngestDocSessionHandleMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.string_and_date_utils import json_to_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from googleapiclient.discovery import Resource as GoogleAPIResource - from googleapiclient.http import MediaIoBaseDownload - -FILE_FORMAT = "{id}-{name}{ext}" -DIRECTORY_FORMAT = "{id}-{name}" - - -@dataclass -class GoogleDriveSessionHandle(BaseSessionHandle): - service: "GoogleAPIResource" - - -@requires_dependencies(["googleapiclient"], extras="google-drive") -def create_service_account_object(key_path: t.Union[str, dict], id=None): - """ - Creates a service object for interacting with Google Drive. - - Providing a drive id enforces a key validation process. - - Args: - key_path: Path to Google Drive service account json file. (or the actual json) - id: ID of a file on Google Drive. File has to be either publicly accessible or accessible - to the service account. - - Returns: - Service account object - """ - from google.auth import default, exceptions - from google.oauth2 import service_account - from googleapiclient.discovery import build - from googleapiclient.errors import HttpError - - # Service account key can be a dict or a file path(str) - # But the dict may come in as a string - key_path = json_to_dict(key_path) - - try: - if isinstance(key_path, dict): - creds = service_account.Credentials.from_service_account_info(key_path) - elif isinstance(key_path, str): - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path - creds, _ = default() - else: - raise ValueError( - f"key path not recognized as a dictionary or a file path: " - f"[{type(key_path)}] {key_path}", - ) - service = build("drive", "v3", credentials=creds) - - if id: - service.files().list( - spaces="drive", - fields="files(id)", - pageToken=None, - corpora="user", - q=f"'{id}' in parents", - ).execute() - - except HttpError as exc: - raise ValueError(f"{exc.reason}") - except exceptions.DefaultCredentialsError: - raise ValueError("The provided API key is invalid.") - - return service - - -@dataclass -class GoogleDriveAccessConfig(AccessConfig): - service_account_key: t.Union[str, dict] = enhanced_field(sensitive=True) - - -@dataclass -class SimpleGoogleDriveConfig(ConfigSessionHandleMixin, BaseConnectorConfig): - """Connector config where drive_id is the id of the document to process or - the folder to process all documents from.""" - - # Google Drive Specific Options - drive_id: str - access_config: GoogleDriveAccessConfig - extension: t.Optional[str] = None - recursive: bool = False - - def create_session_handle( - self, - ) -> GoogleDriveSessionHandle: - service = create_service_account_object(self.access_config.service_account_key) - return GoogleDriveSessionHandle(service=service) - - -@dataclass -class GoogleDriveIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleGoogleDriveConfig - meta: t.Dict[str, str] = field(default_factory=dict) - registry_name: str = "google_drive" - - @property - def filename(self): - return Path(self.meta.get("download_filepath")).resolve() # type: ignore - - @property - def _output_filename(self): - return Path(f"{self.meta.get('output_filepath')}.json").resolve() - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "drive_id": self.connector_config.drive_id, - "file_id": self.meta["id"], - } - - @requires_dependencies(["googleapiclient"], extras="google-drive") - def update_source_metadata(self): - from googleapiclient.errors import HttpError - - try: - file_obj = ( - self.session_handle.service.files() - .get( - fileId=self.meta["id"], - fields="id, createdTime, modifiedTime, version, webContentLink", - ) - .execute() - ) - except HttpError as e: - if e.status_code == 404: - logger.error(f"File {self.meta['name']} not found") - self.source_metadata = SourceMetadata( - exists=True, - ) - return - raise - - date_created = None - if dc := file_obj.get("createdTime", ""): - date_created = datetime.strptime( - dc, - "%Y-%m-%dT%H:%M:%S.%fZ", - ).isoformat() - - date_modified = None - if dm := file_obj.get("modifiedTime", ""): - date_modified = datetime.strptime( - dm, - "%Y-%m-%dT%H:%M:%S.%fZ", - ).isoformat() - - self.source_metadata = SourceMetadata( - date_created=date_created, - date_modified=date_modified, - version=file_obj.get("version", ""), - source_url=file_obj.get("webContentLink", ""), - exists=True, - ) - - @SourceConnectionNetworkError.wrap - def _run_downloader(self, downloader: "MediaIoBaseDownload") -> bool: - downloaded = False - while downloaded is False: - _, downloaded = downloader.next_chunk() - return downloaded - - @requires_dependencies(["googleapiclient"], extras="google-drive") - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - from googleapiclient.http import MediaIoBaseDownload - - if self.meta.get("mimeType", "").startswith("application/vnd.google-apps"): - export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get( - self.meta.get("mimeType"), # type: ignore - ) - if not export_mime: - logger.info( - f"File not supported. Name: {self.meta.get('name')} " - f"ID: {self.meta.get('id')} " - f"MimeType: {self.meta.get('mimeType')}", - ) - return - - request = self.session_handle.service.files().export_media( - fileId=self.meta.get("id"), - mimeType=export_mime, - ) - else: - request = self.session_handle.service.files().get_media(fileId=self.meta.get("id")) - file = io.BytesIO() - downloader = MediaIoBaseDownload(file, request) - self.update_source_metadata() - downloaded = self._run_downloader(downloader=downloader) - - saved = False - if downloaded and file: - dir_ = Path(self.meta["download_dir"]) - if dir_: - if not dir_.is_dir(): - logger.debug(f"Creating directory: {self.meta.get('download_dir')}") - - if dir_: - dir_.mkdir(parents=True, exist_ok=True) - - with open(self.filename, "wb") as handler: - handler.write(file.getbuffer()) - saved = True - logger.debug(f"File downloaded: {self.filename}.") - if not saved: - logger.error(f"Error while downloading and saving file: {self.filename}.") - - def write_result(self): - """Write the structured json result for this doc. result must be json serializable.""" - if self.read_config.download_only: - return - self._output_filename.parent.mkdir(parents=True, exist_ok=True) - with open(self._output_filename, "w") as output_f: - output_f.write(json.dumps(self.isd_elems_no_filename, ensure_ascii=False, indent=2)) - logger.info(f"Wrote {self._output_filename}") - - -@dataclass -class GoogleDriveSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Objects of this class support fetching documents from Google Drive""" - - connector_config: SimpleGoogleDriveConfig - - def _list_objects(self, drive_id, recursive=False): - files = [] - service = self.connector_config.create_session_handle().service - - def traverse(drive_id, download_dir, output_dir, recursive=False): - page_token = None - while True: - response = ( - service.files() - .list( - spaces="drive", - fields="nextPageToken, files(id, name, mimeType)", - pageToken=page_token, - corpora="user", - q=f"'{drive_id}' in parents", - ) - .execute() - ) - - for meta in response.get("files", []): - if meta.get("mimeType") == "application/vnd.google-apps.folder": - dir_ = DIRECTORY_FORMAT.format(name=meta.get("name"), id=meta.get("id")) - if recursive: - download_sub_dir = (download_dir / dir_).resolve() - output_sub_dir = (output_dir / dir_).resolve() - traverse(meta.get("id"), download_sub_dir, output_sub_dir, True) - else: - ext = "" - if not Path(meta.get("name")).suffixes: - guess = guess_extension(meta.get("mimeType")) - ext = guess if guess else ext - - if meta.get("mimeType", "").startswith("application/vnd.google-apps"): - export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get(meta.get("mimeType")) - if not export_mime: - logger.info( - f"File {meta.get('name')} has an " - f"unsupported MimeType {meta.get('mimeType')}", - ) - continue - - if not ext: - guess = guess_extension(export_mime) - ext = guess if guess else ext - - # TODO (Habeeb): Consider filtering at the query level. - if ( - self.connector_config.extension - and self.connector_config.extension != ext - ): # noqa: SIM102 - logger.debug( - f"File {meta.get('name')} does not match " - f"the file type {self.connector_config.extension}", - ) - continue - - name = FILE_FORMAT.format(name=meta.get("name"), id=meta.get("id"), ext=ext) - meta["download_dir"] = str(download_dir) - meta["download_filepath"] = (download_dir / name).resolve().as_posix() - meta["output_dir"] = str(output_dir) - meta["output_filepath"] = (output_dir / name).resolve().as_posix() - files.append(meta) - - page_token = response.get("nextPageToken", None) - if page_token is None: - break - - traverse( - drive_id, - Path(self.read_config.download_dir), - Path(self.processor_config.output_dir), - recursive, - ) - return files - - def initialize(self): - pass - - def check_connection(self): - try: - self.connector_config.create_session_handle().service - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def get_ingest_docs(self): - files = self._list_objects(self.connector_config.drive_id, self.connector_config.recursive) - return [ - GoogleDriveIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - meta=file, - ) - for file in files - ] diff --git a/unstructured/ingest/connector/hubspot.py b/unstructured/ingest/connector/hubspot.py deleted file mode 100644 index 3f01f4e81..000000000 --- a/unstructured/ingest/connector/hubspot.py +++ /dev/null @@ -1,278 +0,0 @@ -import typing as t -from dataclasses import dataclass -from enum import Enum -from functools import reduce -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSessionHandle, - BaseSingleIngestDoc, - BaseSourceConnector, - ConfigSessionHandleMixin, - IngestDocCleanupMixin, - IngestDocSessionHandleMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from hubspot import HubSpot - -CONTENT_TAG = "content" - - -class HubSpotObjectTypes(Enum): - CALLS = "calls" - COMMUNICATIONS = "communications" - EMAILS = "emails" - NOTES = "notes" - PRODUCTS = "products" - TICKETS = "tickets" - - -@dataclass -class HubSpotSessionHandle(BaseSessionHandle): - service: "HubSpot" - - -@dataclass -class HubSpotAccessConfig(AccessConfig): - api_token: str = enhanced_field(repr=False, sensitive=True) - - -@dataclass -class SimpleHubSpotConfig(ConfigSessionHandleMixin, BaseConnectorConfig): - access_config: HubSpotAccessConfig - params: t.Optional[str] = None - properties: t.Optional[dict] = None - object_types: t.Optional[t.List[str]] = None - custom_properties: t.Optional[t.Dict[str, t.List[str]]] = None - - @requires_dependencies(["hubspot"], extras="hubspot") - def create_session_handle(self) -> HubSpotSessionHandle: - from hubspot import HubSpot - - service = HubSpot(access_token=self.access_config.api_token) - return HubSpotSessionHandle(service=service) - - -@dataclass -class HubSpotIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleHubSpotConfig - object_id: str - object_type: str - content_properties: t.List[str] - registry_name: str = "hubspot" - - def __post_init__(self): - self._add_custom_properties() - - @property - def filename(self): - return ( - Path(self.read_config.download_dir) - / f"{self.object_type}/{self.object_id}.txt" # type: ignore - ).resolve() - - @property - def _output_filename(self): - return ( - Path(self.processor_config.output_dir) - / f"{self.object_type}/{self.object_id}.json" # type: ignore - ).resolve() - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - f"{self.registry_name}_id": self.object_id, - } - - @property - def version(self) -> t.Optional[str]: - return None - - @property - def source_url(self) -> t.Optional[str]: - return None - - def _add_custom_properties(self): - if (self.connector_config.custom_properties is not None) and ( - (cprops := self.connector_config.custom_properties.get(self.object_type)) is not None - ): - self.content_properties += cprops - - def _join_object_properties(self, obj) -> str: - return "\n".join( - [ - obj.properties[cprop] - for cprop in self.content_properties - if (obj.properties.get(cprop) is not None) - ], - ) - - def _resolve_getter(self): - method_path = "" - if self.object_type in [ - HubSpotObjectTypes.CALLS.value, - HubSpotObjectTypes.COMMUNICATIONS.value, - HubSpotObjectTypes.EMAILS.value, - HubSpotObjectTypes.NOTES.value, - ]: - method_path = f"crm.objects.{self.object_type}.basic_api.get_by_id" - if self.object_type in [ - HubSpotObjectTypes.PRODUCTS.value, - HubSpotObjectTypes.TICKETS.value, - ]: - method_path = f"crm.{self.object_type}.basic_api.get_by_id" - - method = reduce(getattr, method_path.split("."), self.session_handle.service) - return method - - @requires_dependencies(["hubspot"], extras="hubspot") - def _fetch_obj(self, check_only=False): - from hubspot.crm.objects.exceptions import NotFoundException - - get_by_id_method = self._resolve_getter() - try: - response = get_by_id_method( - self.object_id, - properties=([] if check_only else self.content_properties), - ) - except NotFoundException as e: - logger.error(e) - return None - return response - - def update_source_metadata(self, **kwargs) -> None: - obj = kwargs.get("object", self._fetch_obj(check_only=True)) # type: ignore - if obj is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - self.source_metadata = SourceMetadata( - date_created=obj.created_at.isoformat(), - date_modified=obj.updated_at.isoformat(), - exists=True, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - obj = self._fetch_obj() - if obj is None: - raise ValueError( - f"Failed to retrieve object {self.registry_name}", - f"with ID {self.object_id}", - ) - self.update_source_metadata(object=obj) - output = self._join_object_properties(obj) - self.filename.parent.mkdir(parents=True, exist_ok=True) - with open(self.filename, "w", encoding="utf8") as f: - f.write(output) - return - - -@dataclass -class HubSpotSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleHubSpotConfig - - def initialize(self): - self.hubspot = self.connector_config.create_session_handle().service - - def check_connection(self): - return self.connector_config.create_session_handle().service - - @requires_dependencies(["hubspot"], extras="hubspot") - def _list_objects(self, get_page_method, object_type: str, content_properties: t.List[str]): - try: - objects = get_page_method() - except Exception as e: - logger.error(e) - logger.error( - f"Failed to retrieve {object_type}, omitting processing...", - ) - return [] - return [ - HubSpotIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - object_id=obj.id, - object_type=object_type, - content_properties=content_properties, - ) - for obj in objects.results - ] - - def _get_calls(self) -> t.List[HubSpotIngestDoc]: - return self._list_objects( - self.hubspot.crm.objects.calls.basic_api.get_page, - HubSpotObjectTypes.CALLS.value, - ["hs_call_title", "hs_call_body"], - ) - - def _get_communications(self) -> t.List[HubSpotIngestDoc]: - return self._list_objects( - self.hubspot.crm.objects.communications.basic_api.get_page, - HubSpotObjectTypes.COMMUNICATIONS.value, - ["hs_communication_body"], - ) - - def _get_emails(self) -> t.List[HubSpotIngestDoc]: - return self._list_objects( - self.hubspot.crm.objects.emails.basic_api.get_page, - HubSpotObjectTypes.EMAILS.value, - ["hs_email_subject", "hs_email_text"], - ) - - def _get_notes(self) -> t.List[HubSpotIngestDoc]: - return self._list_objects( - self.hubspot.crm.objects.notes.basic_api.get_page, - HubSpotObjectTypes.NOTES.value, - ["hs_note_body"], - ) - - def _get_products(self) -> t.List[HubSpotIngestDoc]: - return self._list_objects( - self.hubspot.crm.products.basic_api.get_page, - HubSpotObjectTypes.PRODUCTS.value, - ["description"], - ) - - def _get_tickets(self) -> t.List[HubSpotIngestDoc]: - return self._list_objects( - self.hubspot.crm.tickets.basic_api.get_page, - HubSpotObjectTypes.TICKETS.value, - ["subject", "content"], - ) - - def get_ingest_docs(self): - obj_method_resolver = { - HubSpotObjectTypes.CALLS.value: self._get_calls, - HubSpotObjectTypes.COMMUNICATIONS.value: self._get_communications, - HubSpotObjectTypes.EMAILS.value: self._get_emails, - HubSpotObjectTypes.NOTES.value: self._get_notes, - HubSpotObjectTypes.PRODUCTS.value: self._get_products, - HubSpotObjectTypes.TICKETS.value: self._get_tickets, - } - - if self.connector_config.object_types is not None: - obj_method_resolver = { - obj_name: obj_method_resolver.get(obj_name) # type: ignore - for obj_name in self.connector_config.object_types - } - - ingest_docs: t.List[HubSpotIngestDoc] = [] - for obj_name, obj_method in obj_method_resolver.items(): - logger.info(f"Retrieving - {obj_name}") - results: t.List[HubSpotIngestDoc] = obj_method() # type: ignore - ingest_docs += results # type: ignore - - return ingest_docs diff --git a/unstructured/ingest/connector/jira.py b/unstructured/ingest/connector/jira.py deleted file mode 100644 index d29e1f2dc..000000000 --- a/unstructured/ingest/connector/jira.py +++ /dev/null @@ -1,469 +0,0 @@ -import math -import typing as t -from collections import abc -from dataclasses import dataclass, field -from datetime import datetime -from functools import cached_property -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSessionHandle, - BaseSingleIngestDoc, - BaseSourceConnector, - ConfigSessionHandleMixin, - IngestDocCleanupMixin, - IngestDocSessionHandleMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from atlassian import Jira - - -@dataclass -class JiraSessionHandle(BaseSessionHandle): - service: "Jira" - - -@requires_dependencies(["atlassian"], extras="jira") -def create_jira_object(url, user_email, api_token): - """ - Creates a jira object for interacting with Jira Cloud. - Args: - url: URL to Jira Cloud organization - user_email: Email for the user with the permissions - api_token: API Token, generated for the user - - Returns: - Jira object - """ - from atlassian import Jira - - jira = Jira( - url, - username=user_email, - password=api_token, - ) - - response = jira.get_permissions("BROWSE_PROJECTS") - permitted = response["permissions"]["BROWSE_PROJECTS"]["havePermission"] - - if permitted: - return jira - - else: - raise ValueError( - """The user with the provided *user_email* and the *api_token* - is not permitted to browse projects for the jira organization - for the provided *url*. Try checking user_email, api_token, - and the url arguments.""", - ) - - -@dataclass -class JiraAccessConfig(AccessConfig): - api_token: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleJiraConfig(ConfigSessionHandleMixin, BaseConnectorConfig): - """Connector config where: - user_email is the email to authenticate into Atlassian (Jira) Cloud, - api_token is the api token to authenticate into Atlassian (Jira) Cloud, - url is the URL pointing to the Atlassian (Jira) Cloud instance, - list_of_projects is a list of project that is aimed to be ingested. - - Check ... - for more info on the api_token. - """ - - user_email: str - access_config: JiraAccessConfig - url: str - projects: t.Optional[t.List[str]] = None - boards: t.Optional[t.List[str]] = None - issues: t.Optional[t.List[str]] = None - - def create_session_handle( - self, - ) -> JiraSessionHandle: - service = create_jira_object( - url=self.url, user_email=self.user_email, api_token=self.access_config.api_token - ) - return JiraSessionHandle(service=service) - - -@dataclass -class JiraFileMeta: - """Metadata specifying: - project_id: id for the jira project that the issue locates in, and - issue_key: key for the issue that is being reached to. - """ - - project_id: str - board_id: t.Optional[str] - issue_key: str - issue_id: str - - -# An implementation to obtain nested-defaultdict functionality. -# Keys have default values in a recursive manner, allowing -# limitless templates to parse an api response object. -def nested_object_to_field_getter(object): - if isinstance(object, abc.Mapping): - new_object = {} - for k, v in object.items(): - if isinstance(v, abc.Mapping): - new_object[k] = FieldGetter(nested_object_to_field_getter(v)) - else: - new_object[k] = v - return FieldGetter(new_object) - else: - return object - - -class FieldGetter(dict): - def __getitem__(self, key): - value = super().__getitem__(key) if key in self else None - if value is None: - value = FieldGetter({}) - return value - - -def form_templated_string(issue, parsed_fields, c_sep="|||", r_sep="\n\n\n"): - """Forms a template string via parsing the fields from the API response object on the issue - The template string will be saved to the disk, and then will be processed by partition.""" - return r_sep.join( - [ - _get_id_fields_for_issue(issue), - _get_project_fields_for_issue(parsed_fields), - _get_dropdown_fields_for_issue(parsed_fields), - _get_subtasks_for_issue(parsed_fields), - _get_comments_for_issue(parsed_fields), - _get_text_fields_for_issue(parsed_fields), - ], - ) - - -DEFAULT_C_SEP = " " * 5 -DEFAULT_R_SEP = "\n" - - -def _get_id_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP): - id, key = issue["id"], issue["key"] - return f"IssueID_IssueKey:{id}{c_sep}{key}{r_sep}" - - -def _get_project_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP): - if "project" in issue: - return ( - f"""ProjectID_Key:{issue["project"]["key"]}{c_sep}{issue["project"]["name"]}{r_sep}""" - ) - else: - return "" - - -def _get_dropdown_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP): - return f""" - IssueType:{issue["issuetype"]["name"]} - {r_sep} - Status:{issue["status"]["name"]} - {r_sep} - Priority:{issue["priority"]} - {r_sep} - AssigneeID_Name:{issue["assignee"]["accountId"]}{c_sep}{issue["assignee"]["displayName"]} - {r_sep} - ReporterAdr_Name:{issue["reporter"]["emailAddress"]}{c_sep}{issue["reporter"]["displayName"]} - {r_sep} - Labels:{c_sep.join(issue["labels"])} - {r_sep} - Components:{c_sep.join([component["name"] for component in issue["components"]])} - {r_sep} - """ - - -def _get_subtasks_for_issue(issue): - return "" - - -def _get_text_fields_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP): - return f""" - {issue["summary"]} - {r_sep} - {issue["description"]} - {r_sep} - {c_sep.join([atch["self"] for atch in issue["attachment"]])} - {r_sep} - """ - - -def _get_comments_for_issue(issue, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP): - return c_sep.join( - [_get_fields_for_comment(comment) for comment in issue["comment"]["comments"]], - ) - - -def _get_fields_for_comment(comment, c_sep=DEFAULT_C_SEP, r_sep=DEFAULT_R_SEP): - return f"{comment['author']['displayName']}{c_sep}{comment['body']}{r_sep}" - - -def scroll_wrapper(func, results_key="results"): - def wrapper(*args, **kwargs): - """Wraps a function to obtain scroll functionality. - Function needs to be able to accept 'start' and 'limit' arguments.""" - if "number_of_items_to_fetch" in kwargs: - number_of_items_to_fetch = kwargs["number_of_items_to_fetch"] - del kwargs["number_of_items_to_fetch"] - else: - number_of_items_to_fetch = 100 - - kwargs["limit"] = min(100, number_of_items_to_fetch) - kwargs["start"] = kwargs.get("start", 0) - - all_results = [] - num_iterations = math.ceil(number_of_items_to_fetch / kwargs["limit"]) - - for _ in range(num_iterations): - response = func(*args, **kwargs) - if isinstance(response, list): - all_results += func(*args, **kwargs) - elif isinstance(response, dict): - if results_key not in response: - raise KeyError( - "Response object has no known keys to \ - access the results, such as 'results' or 'values'.", - ) - all_results += func(*args, **kwargs)[results_key] - kwargs["start"] += kwargs["limit"] - - return all_results[:number_of_items_to_fetch] - - return wrapper - - -@dataclass -class JiraIngestDoc(IngestDocSessionHandleMixin, IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing). - - Current implementation creates a Jira connection object - to fetch each doc, rather than creating a it for each thread. - """ - - connector_config: SimpleJiraConfig - file_meta: t.Optional[JiraFileMeta] = None - registry_name: str = "jira" - - @cached_property - def record_locator(self): # Values must be JSON-serializable - """A dictionary with any data necessary to uniquely identify the document on - the source system.""" - return { - "base_url": self.connector_config.url, - "issue_key": self.file_meta.issue_key, - } - - @cached_property - @SourceConnectionNetworkError.wrap - def issue(self): - """Gets issue data""" - jira = self.session_handle.service - return jira.issue(self.file_meta.issue_key) - - @cached_property - def parsed_fields(self): - return nested_object_to_field_getter(self.issue["fields"]) - - @property - def grouping_folder_name(self): - if self.file_meta.board_id: - return self.file_meta.board_id - else: - return self.file_meta.project_id - - @property - def filename(self): - download_file = f"{self.file_meta.issue_id}.txt" - - return ( - Path(self.read_config.download_dir) / self.grouping_folder_name / download_file - ).resolve() - - @property - def _output_filename(self): - """Create output file path.""" - output_file = f"{self.file_meta.issue_id}.json" - - return ( - Path(self.processor_config.output_dir) / self.grouping_folder_name / output_file - ).resolve() - - @property - def version(self) -> t.Optional[str]: - return None - - def update_source_metadata(self, **kwargs) -> None: - exists = bool(self.issue) - if not exists: - self.source_metadata = SourceMetadata( - exists=exists, - ) - return - - self.source_metadata = SourceMetadata( - date_created=datetime.strptime( - self.parsed_fields["created"], - "%Y-%m-%dT%H:%M:%S.%f%z", - ).isoformat(), - date_modified=datetime.strptime( - self.parsed_fields["updated"], - "%Y-%m-%dT%H:%M:%S.%f%z", - ).isoformat(), - source_url=f"{self.connector_config.url}/browse/{self.file_meta.issue_key}", - exists=exists, - ) - - @SourceConnectionError.wrap - @requires_dependencies(["atlassian"], extras="jira") - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - document = form_templated_string(self.issue, self.parsed_fields) - self.update_source_metadata() - self.filename.parent.mkdir(parents=True, exist_ok=True) - - with open(self.filename, "w", encoding="utf8") as f: - f.write(document) - - -@dataclass -class JiraSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Fetches issues from projects in an Atlassian (Jira) Cloud instance.""" - - connector_config: SimpleJiraConfig - _jira: t.Optional["Jira"] = field(init=False, default=None) - - @property - def jira(self) -> "Jira": - if self._jira is None: - try: - self._jira = self.connector_config.create_session_handle().service - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - return self._jira - - @requires_dependencies(["atlassian"], extras="jira") - def initialize(self): - _ = self.jira - - def check_connection(self): - _ = self.jira - - @requires_dependencies(["atlassian"], extras="jira") - def _get_all_project_ids(self): - """Fetches ids for all projects in a Jira domain.""" - project_ids = [project["key"] for project in self.jira.projects()] - return project_ids - - @requires_dependencies(["atlassian"], extras="jira") - def _get_issues_within_one_project( - self, - project_id: str, - ): - get_issues_with_scroll = scroll_wrapper(self.jira.get_all_project_issues) - results = get_issues_with_scroll(project=project_id, fields=["key"]) - - return [(issue["key"], issue["id"], None) for issue in results] - - @requires_dependencies(["atlassian"], extras="jira") - def _get_issue_keys_within_projects(self, project_ids: t.Optional[t.List[str]] = None): - if project_ids is None: - # for when a component list is provided, without any projects - if bool(self.connector_config.boards or self.connector_config.issues): - return [] - # for when no components are provided. all projects will be ingested - else: - return self._get_all_project_ids() - - # for when a component list is provided, including some projects - issue_keys_all = [self._get_issues_within_one_project(project_id=id) for id in project_ids] - - issue_keys_flattened = [ - (issue_key, issue_id, None) - for issue_keys_project in issue_keys_all - for issue_key, issue_id, board_id in issue_keys_project - ] - - return issue_keys_flattened - - def _get_issues_within_one_board(self, board_id: str): - get_issues_with_scroll = scroll_wrapper( - self.jira.get_issues_for_board, - results_key="issues", - ) - results = get_issues_with_scroll(board_id=board_id, fields=["key"], jql=None) - - return [(issue["key"], issue["id"], board_id) for issue in results] - - def _get_issue_keys_within_boards(self, board_ids): - if board_ids is None: - return [] - - issue_keys_all = [self._get_issues_within_one_board(board_id=id) for id in board_ids] - - issue_keys_flattened = [ - (issue_key, issue_id, board_id) - for issue_keys_board in issue_keys_all - for issue_key, issue_id, board_id in issue_keys_board - ] - return issue_keys_flattened - - def get_issues_info(self, issues): - issues_info = [self.jira.get_issue(issue, ["key", "id"]) for issue in issues] - return [(info["key"], info["id"], None) for info in issues_info] - - def get_issue_keys_for_given_components(self): - issues = [] - - if self.connector_config.projects: - issues += self._get_issue_keys_within_projects(self.connector_config.projects) - if self.connector_config.boards: - issues += self._get_issue_keys_within_boards(self.connector_config.boards) - if self.connector_config.issues: - issues += self.get_issues_info(self.connector_config.issues) - - return issues - - def get_ingest_docs(self): - """Fetches all issues in a project.""" - if bool( - self.connector_config.projects - or self.connector_config.boards - or self.connector_config.issues, - ): - issue_keys_and_ids = self.get_issue_keys_for_given_components() - else: - # gets all issue ids from all projects - issue_keys_and_ids = self._get_issue_keys_within_projects() - - return [ - JiraIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - file_meta=JiraFileMeta( - issue_id=issue_id, - issue_key=issue_key, - project_id=issue_key.split("-")[0], - board_id=board_id, - ), - ) - for issue_key, issue_id, board_id in issue_keys_and_ids - ] diff --git a/unstructured/ingest/connector/kafka.py b/unstructured/ingest/connector/kafka.py deleted file mode 100644 index 4510cf3d7..000000000 --- a/unstructured/ingest/connector/kafka.py +++ /dev/null @@ -1,294 +0,0 @@ -import base64 -import json -import socket -import typing as t -from dataclasses import dataclass -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - BaseIngestDoc, - BaseSingleIngestDoc, - BaseSourceConnector, - ConfigSessionHandleMixin, - IngestDocCleanupMixin, - IngestDocSessionHandleMixin, - SourceConnectorCleanupMixin, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from confluent_kafka import Consumer, Producer - - -@dataclass -class KafkaAccessConfig(AccessConfig): - kafka_api_key: t.Optional[str] = enhanced_field(sensitive=True) - secret: t.Optional[str] = enhanced_field(sensitive=True) - - -@dataclass -class SimpleKafkaConfig(ConfigSessionHandleMixin, BaseConnectorConfig): - bootstrap_server: str - port: str - topic: str - access_config: KafkaAccessConfig - confluent: t.Optional[bool] = True - num_messages_to_consume: t.Optional[int] = 1 - timeout: t.Optional[float] = 1.0 - - -@dataclass -class KafkaIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a message and writing processed results.""" - - connector_config: SimpleKafkaConfig - raw_content: str - raw_filename: str - registry_name: str = "kafka" - - def _tmp_download_file(self): - topic_file = self.connector_config.topic + "-" + self.raw_filename - return Path(self.read_config.download_dir) / topic_file - - @property - def version(self) -> t.Optional[str]: - return None - - @property - def source_url(self) -> t.Optional[str]: - return None - - @property - def filename(self): - """The filename of the file created""" - return self._tmp_download_file() - - def _create_full_tmp_dir_path(self): - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - @property - def _output_filename(self): - """Create filename document id combined with a hash of the query to uniquely identify - the output file.""" - output_file = self.connector_config.topic + ".json" - return Path(self.processor_config.output_dir) / output_file - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - self._create_full_tmp_dir_path() - - pdf_data = base64.b64decode(self.raw_content) - - with open(self.filename, "wb") as file: - file.write(pdf_data) - - -@dataclass -class KafkaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Source connector for Kafka. - Main job is to consume from a Kafka topic and create instances of - KakfaIngestDoc. - Note that messages have the format of: - : the name of the file (with correct file extension) - : base64 encoded (whether was binary or not) - """ - - connector_config: SimpleKafkaConfig - _consumer: t.Optional["Consumer"] = None - - def check_connection(self): - try: - self.kafka_consumer - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def initialize(self): - topic = self.connector_config.topic - logger.info(f"Subscribing to topic: {topic}") - self.kafka_consumer.subscribe([topic]) - - @property - def kafka_consumer(self): - if self._consumer is None: - self._consumer = self.create_consumer() - return self._consumer - - @requires_dependencies(["confluent_kafka"], extras="kafka") - def create_consumer(self) -> "Consumer": - from confluent_kafka import Consumer - - is_confluent = self.connector_config.confluent - bootstrap = self.connector_config.bootstrap_server - port = self.connector_config.port - - conf = { - "bootstrap.servers": f"{bootstrap}:{port}", - "client.id": socket.gethostname(), - "group.id": "your_group_id", - "enable.auto.commit": "false", - "auto.offset.reset": "earliest", - "message.max.bytes": 10485760, - } - - if is_confluent: - kafka_api_key = self.connector_config.access_config.kafka_api_key - secret = self.connector_config.access_config.secret - conf["sasl.mechanism"] = "PLAIN" - conf["security.protocol"] = "SASL_SSL" - conf["sasl.username"] = kafka_api_key - conf["sasl.password"] = secret - - consumer = Consumer(conf) - logger.debug(f"Kafka Consumer connected to bootstrap: {bootstrap}") - return consumer - - @SourceConnectionError.wrap - def get_ingest_docs(self): - from confluent_kafka import KafkaError - - consumer = self.kafka_consumer - running = True - - collected = [] - num_messages_to_consume = self.connector_config.num_messages_to_consume - logger.info(f"Config set for blocking on {num_messages_to_consume} messages") - # Consume specified number of messages - while running: - msg = consumer.poll(timeout=self.connector_config.timeout) - if msg is None: - logger.debug("No Kafka messages found") - continue - if msg.error(): - if msg.error().code() == KafkaError._PARTITION_EOF: - # End of partition event - logger.error( - "%% %s [%d] reached end at offset %d\n" - % (msg.topic(), msg.partition(), msg.offset()) - ) - else: - collected.append(json.loads(msg.value().decode("utf8"))) - if len(collected) >= num_messages_to_consume: - logger.debug(f"Found {len(collected)} messages, stopping") - consumer.commit(asynchronous=False) - break - - return [ - KafkaIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - raw_filename=msg["filename"], - raw_content=msg["content"], - ) - for msg in collected - ] - - -@dataclass -class KafkaWriteConfig(WriteConfig): - batch_size: int = 4 - - -@dataclass -class KafkaDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector): - """Connector to write BaseIngestDoc types to Kafka - Writes messages to Kafka in the format: - "type" - "text": - "filename": - """ - - write_config: KafkaWriteConfig - connector_config: SimpleKafkaConfig - _producer: t.Optional["Producer"] = None - - @property - def kafka_producer(self): - if self._producer is None: - self._producer = self.create_producer() - return self._producer - - def initialize(self): - pass - - @requires_dependencies(["confluent_kafka"], extras="kafka") - def create_producer(self) -> "Producer": - from confluent_kafka import Producer - - is_confluent = self.connector_config.confluent - bootstrap = self.connector_config.bootstrap_server - port = self.connector_config.port - - conf = { - "bootstrap.servers": f"{bootstrap}:{port}", - "client.id": socket.gethostname(), - } - - if is_confluent: - api_key = self.connector_config.access_config.kafka_api_key - secret = self.connector_config.access_config.secret - conf["sasl.mechanism"] = "PLAIN" - conf["security.protocol"] = "SASL_SSL" - conf["sasl.username"] = api_key - conf["sasl.password"] = secret - - producer = Producer(conf) - logger.debug(f"Connected to bootstrap: {bootstrap}") - return producer - - def check_connection(self): - try: - self.kafka_producer - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - @DestinationConnectionError.wrap - def upload_msg(self, batch) -> int: - logger.debug(f"Uploading batch: {batch}") - topic = self.connector_config.topic - producer = self.kafka_producer - uploaded = 0 - for i in range(len(batch)): - filename = f'{batch[i].pop("filename")}' - producer.produce(topic, key=filename, value=str(batch[i])) - uploaded += 1 - return uploaded - - @DestinationConnectionError.wrap - def write_dict(self, *args, dict_list: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info(f"Writing {len(dict_list)} documents to Kafka") - num_uploaded = 0 - - for chunk in batch_generator(dict_list, self.write_config.batch_size): - num_uploaded += self.upload_msg(chunk) # noqa: E203 - - producer = self.kafka_producer - producer.flush() - logger.info(f"Uploaded {num_uploaded} documents to Kafka") - - def write(self, docs: t.List[BaseIngestDoc]) -> None: - content_list: t.List[t.Dict[str, t.Any]] = [] - for doc in docs: - local_path = doc._output_filename - with open(local_path) as json_file: - dict_content = json.load(json_file) - for content in dict_content: - content_list.append( - { - "type": content["type"], - "text": content["text"], - "filename": content["metadata"]["filename"], - } - ) - self.write_dict(dict_list=content_list) diff --git a/unstructured/ingest/connector/local.py b/unstructured/ingest/connector/local.py deleted file mode 100644 index 417828606..000000000 --- a/unstructured/ingest/connector/local.py +++ /dev/null @@ -1,139 +0,0 @@ -import fnmatch -import glob -import os -import typing as t -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path - -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - SourceMetadata, -) -from unstructured.ingest.logger import logger - - -@dataclass -class SimpleLocalConfig(BaseConnectorConfig): - # Local specific options - input_path: str - recursive: bool = False - file_glob: t.Optional[t.List[str]] = None - - def __post_init__(self): - if os.path.isfile(self.input_path): - self.input_path_is_file = True - else: - self.input_path_is_file = False - - -@dataclass -class LocalIngestDoc(BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - """ - - connector_config: SimpleLocalConfig - path: str - registry_name: str = "local" - - @property - def base_filename(self) -> t.Optional[str]: - download_path = Path(self.connector_config.input_path).resolve() - full_path = Path(self.filename).resolve() - if download_path.is_file(): - download_path = download_path.parent - relative_path = full_path.relative_to(download_path) - return str(relative_path) - - @property - def filename(self): - """The filename of the local file to be processed""" - return Path(self.path) - - def cleanup_file(self): - """Not applicable to local file system""" - - def get_file(self): - """Not applicable to local file system""" - - def update_source_metadata(self, **kwargs) -> None: - try: - out = os.lstat(self.path) - self._source_metadata = SourceMetadata( - exists=True, - date_created=str(datetime.fromtimestamp(out.st_ctime)), - date_modified=str(datetime.fromtimestamp(out.st_mtime)), - permissions_data=[{"mode": out.st_mode}], - source_url=self.path, - ) - except FileNotFoundError: - self._source_metadata = SourceMetadata(exists=False) - - @property - def _output_filename(self) -> Path: - """Returns output filename for the doc - If input path argument is a file itself, it returns the filename of the doc. - If input path argument is a folder, it returns the relative path of the doc. - """ - input_path = Path(self.connector_config.input_path) - basename = ( - f"{self.base_filename}.json" - if input_path.is_file() - else f"{Path(self.path).relative_to(input_path)}.json" - ) - return Path(self.processor_config.output_dir) / basename - - -@dataclass -class LocalSourceConnector(BaseSourceConnector): - """Objects of this class support fetching document(s) from local file system""" - - def check_connection(self): - pass - - connector_config: SimpleLocalConfig - - def __post_init__(self): - self.ingest_doc_cls: t.Type[LocalIngestDoc] = LocalIngestDoc - - def cleanup(self, cur_dir=None): - """Not applicable to local file system""" - - def initialize(self): - """Not applicable to local file system""" - - def _list_files(self): - if self.connector_config.input_path_is_file: - return glob.glob(f"{self.connector_config.input_path}") - elif self.connector_config.recursive: - return glob.glob( - f"{self.connector_config.input_path}/**", - recursive=self.connector_config.recursive, - ) - else: - return glob.glob(f"{self.connector_config.input_path}/*") - - def does_path_match_glob(self, path: str) -> bool: - if self.connector_config.file_glob is None: - return True - patterns = self.connector_config.file_glob - for pattern in patterns: - if fnmatch.filter([path], pattern): - return True - logger.debug(f"The file {path!r} is discarded as it does not match any given glob.") - return False - - def get_ingest_docs(self): - return [ - self.ingest_doc_cls( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - path=file, - ) - for file in self._list_files() - if os.path.isfile(file) and self.does_path_match_glob(file) - ] diff --git a/unstructured/ingest/connector/mongodb.py b/unstructured/ingest/connector/mongodb.py deleted file mode 100644 index ae73ecbec..000000000 --- a/unstructured/ingest/connector/mongodb.py +++ /dev/null @@ -1,284 +0,0 @@ -import copy -import typing as t -from dataclasses import dataclass, field -from pathlib import Path - -from unstructured.__version__ import __version__ as unstructured_version -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError, WriteError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - BaseIngestDocBatch, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from pymongo import MongoClient - - -SERVER_API_VERSION = "1" - - -def parse_userinfo(userinfo: str) -> t.Tuple[str, str]: - user, _, passwd = userinfo.partition(":") - return user, passwd - - -@dataclass -class MongoDBAccessConfig(AccessConfig): - uri: t.Optional[str] = enhanced_field(sensitive=True, default=None) - - -@dataclass -class SimpleMongoDBConfig(BaseConnectorConfig): - access_config: MongoDBAccessConfig - host: t.Optional[str] = None - database: t.Optional[str] = None - collection: t.Optional[str] = None - port: int = 27017 - batch_size: int = 100 - - @requires_dependencies(["pymongo"], extras="mongodb") - def generate_client(self) -> "MongoClient": - from pymongo import MongoClient - from pymongo.driver_info import DriverInfo - from pymongo.server_api import ServerApi - - if self.access_config.uri: - return MongoClient( - self.access_config.uri, - server_api=ServerApi(version=SERVER_API_VERSION), - driver=DriverInfo(name="unstructured", version=unstructured_version), - ) - else: - return MongoClient( - host=self.host, - port=self.port, - server_api=ServerApi(version=SERVER_API_VERSION), - ) - - def get_collection(self, client): - database = client[self.database] - return database.get_collection(name=self.collection) - - -@dataclass -class MongoDBDocumentMeta: - collection: str - document_id: str - date_created: str - - -@dataclass -class MongoDBIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleMongoDBConfig - document_meta: MongoDBDocumentMeta - document: dict = field(default_factory=dict) - registry_name: str = "mongodb" - - @property - def filename(self): - return ( - Path(self.read_config.download_dir) - / self.connector_config.collection - / f"{self.document_meta.document_id}.txt" - ).resolve() - - @property - def _output_filename(self): - return ( - Path(self.processor_config.output_dir) - / self.connector_config.collection - / f"{self.document_meta.document_id}.json" - ) - - def update_source_metadata(self, **kwargs): - if self.document is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - self.source_metadata = SourceMetadata( - date_created=self.document_meta.date_created, - exists=True, - ) - - @SourceConnectionError.wrap - @requires_dependencies(["pymongo"], extras="mongodb") - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - pass - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "host": self.connector_config.host, - "collection": self.connector_config.collection, - "document_id": self.document_meta.document_id, - } - - -@dataclass -class MongoDBIngestDocBatch(BaseIngestDocBatch): - connector_config: SimpleMongoDBConfig - ingest_docs: t.List[MongoDBIngestDoc] = field(default_factory=list) - list_of_ids: t.List[str] = field(default_factory=list) - registry_name: str = "mongodb_batch" - - @property - def unique_id(self) -> str: - return ",".join(sorted(self.list_of_ids)) - - @requires_dependencies(["pymongo"], extras="mongodb") - def _get_docs(self) -> t.List[dict]: - """Fetches all documents in a collection.""" - from bson.objectid import ObjectId - - # Note for future. Maybe this could use other client - client = self.connector_config.generate_client() - collection = self.connector_config.get_collection(client) - # MondoDB expects a list of ObjectIds - list_of_object_ids = [] - for x in self.list_of_ids: - list_of_object_ids.append(ObjectId(x)) - return list(collection.find({"_id": {"$in": list_of_object_ids}})) - - def get_files(self): - documents = self._get_docs() - for doc in documents: - ingest_doc = MongoDBIngestDoc( - processor_config=self.processor_config, - read_config=self.read_config, - connector_config=self.connector_config, - document_meta=MongoDBDocumentMeta( - collection=self.connector_config.collection, - document_id=str(doc.get("_id")), - date_created=doc.get("_id").generation_time.isoformat(), - ), - document=doc, - ) - ingest_doc.update_source_metadata() - del doc["_id"] - filename = ingest_doc.filename - flattened_dict = flatten_dict(dictionary=doc) - str_values = [str(value) for value in flattened_dict.values()] - concatenated_values = "\n".join(str_values) - - filename.parent.mkdir(parents=True, exist_ok=True) - with open(filename, "w", encoding="utf8") as f: - f.write(concatenated_values) - - self.ingest_docs.append(ingest_doc) - - -@dataclass -class MongoDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleMongoDBConfig - _client: t.Optional["MongoClient"] = field(init=False, default=None) - - @property - def client(self) -> "MongoClient": - if self._client is None: - self._client = self.connector_config.generate_client() - return self._client - - def check_connection(self): - try: - self.client.admin.command("ping") - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def initialize(self): - _ = self.client - - @requires_dependencies(["pymongo"], extras="mongodb") - def _get_doc_ids(self) -> t.List[str]: - """Fetches all document ids in a collection.""" - collection = self.connector_config.get_collection(self.client) - return [str(x) for x in collection.distinct("_id")] - - def get_ingest_docs(self): - """Fetches all documents in an index, using ids that are fetched with _get_doc_ids""" - ids = self._get_doc_ids() - id_batches = [ - ids[ - i - * self.connector_config.batch_size : (i + 1) # noqa - * self.connector_config.batch_size - ] - for i in range( - (len(ids) + self.connector_config.batch_size - 1) - // self.connector_config.batch_size - ) - ] - - return [ - MongoDBIngestDocBatch( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - list_of_ids=batched_ids, - ) - for batched_ids in id_batches - ] - - -@dataclass -class MongoDBDestinationConnector(BaseDestinationConnector): - connector_config: SimpleMongoDBConfig - _client: t.Optional["MongoClient"] = field(init=False, default=None) - - def to_dict(self, **kwargs): - """ - The _client variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle '_thread.lock' object - When serializing, remove it, meaning client data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - if hasattr(self_cp, "_client"): - setattr(self_cp, "_client", None) - return _asdict(self_cp, **kwargs) - - @property - def client(self) -> "MongoClient": - if self._client is None: - self._client = self.connector_config.generate_client() - return self._client - - @requires_dependencies(["pymongo"], extras="mongodb") - def check_connection(self): - try: - self.client.admin.command("ping") - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def initialize(self): - _ = self.client - - @requires_dependencies(["pymongo"], extras="mongodb") - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info( - f"writing {len(elements_dict)} documents to destination " - f"database {self.connector_config.database}, " - f"at collection {self.connector_config.collection}", - ) - - collection = self.connector_config.get_collection(self.client) - try: - collection.insert_many(elements_dict) - except Exception as e: - logger.error(f"failed to write records: {e}", exc_info=True) - raise WriteError(f"failed to write records: {e}") diff --git a/unstructured/ingest/connector/notion/__init__.py b/unstructured/ingest/connector/notion/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/connector/notion/client.py b/unstructured/ingest/connector/notion/client.py deleted file mode 100644 index dfb9e8e48..000000000 --- a/unstructured/ingest/connector/notion/client.py +++ /dev/null @@ -1,233 +0,0 @@ -from typing import Any, Generator, List, Optional, Tuple - -import backoff -import httpx -import notion_client.errors -from notion_client import Client as NotionClient -from notion_client.api_endpoints import BlocksChildrenEndpoint as NotionBlocksChildrenEndpoint -from notion_client.api_endpoints import BlocksEndpoint as NotionBlocksEndpoint -from notion_client.api_endpoints import DatabasesEndpoint as NotionDatabasesEndpoint -from notion_client.api_endpoints import Endpoint -from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint -from notion_client.errors import RequestTimeoutError - -from unstructured.ingest.connector.notion.types.block import Block -from unstructured.ingest.connector.notion.types.database import Database -from unstructured.ingest.connector.notion.types.database_properties import ( - map_cells, -) -from unstructured.ingest.connector.notion.types.page import Page -from unstructured.ingest.ingest_backoff import RetryHandler -from unstructured.ingest.interfaces import RetryStrategyConfig - -retryable_exceptions = ( - httpx.TimeoutException, - httpx.HTTPStatusError, - notion_client.errors.HTTPResponseError, -) - - -def get_retry_handler(endpoint: Endpoint) -> Optional[RetryHandler]: - if retry_strategy_config := getattr(endpoint, "retry_strategy_config"): - return RetryHandler( - backoff.expo, - retryable_exceptions, - max_time=retry_strategy_config.max_retry_time, - max_tries=retry_strategy_config.max_retries, - logger=endpoint.parent.logger, - start_log_level=endpoint.parent.logger.level, - backoff_log_level=endpoint.parent.logger.level, - ) - return None - - -class BlocksChildrenEndpoint(NotionBlocksChildrenEndpoint): - def __init__( - self, - *args, - retry_strategy_config: Optional[RetryStrategyConfig] = None, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.retry_strategy_config = retry_strategy_config - - @property - def retry_handler(self) -> Optional[RetryHandler]: - return get_retry_handler(self) - - def list(self, block_id: str, **kwargs: Any) -> Tuple[List[Block], dict]: - resp: dict = ( - self.retry_handler(super().list, block_id=block_id, **kwargs) - if self.retry_handler - else super().list(block_id=block_id, **kwargs) - ) # type: ignore - child_blocks = [Block.from_dict(data=b) for b in resp.pop("results", [])] - return child_blocks, resp - - def iterate_list( - self, - block_id: str, - **kwargs: Any, - ) -> Generator[List[Block], None, None]: - while True: - response: dict = ( - self.retry_handler(super().list, block_id=block_id, **kwargs) - if self.retry_handler - else super().list(block_id=block_id, **kwargs) - ) # type: ignore - child_blocks = [Block.from_dict(data=b) for b in response.pop("results", [])] - yield child_blocks - - next_cursor = response.get("next_cursor") - if not response.get("has_more") or not next_cursor: - return - - -class DatabasesEndpoint(NotionDatabasesEndpoint): - def __init__( - self, - *args, - retry_strategy_config: Optional[RetryStrategyConfig] = None, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.retry_strategy_config = retry_strategy_config - - @property - def retry_handler(self) -> Optional[RetryHandler]: - return get_retry_handler(self) - - def retrieve(self, database_id: str, **kwargs: Any) -> Database: - resp: dict = ( - self.retry_handler(super().retrieve, database_id=database_id, **kwargs) - if (self.retry_handler) - else (super().retrieve(database_id=database_id, **kwargs)) - ) # type: ignore - return Database.from_dict(data=resp) - - def retrieve_status(self, database_id: str, **kwargs) -> int: - request = self.parent._build_request( - method="HEAD", - path=f"databases/{database_id}", - auth=kwargs.get("auth"), - ) - try: - response: httpx.Response = ( - self.retry_handler(self.parent.client.send, request) - if (self.retry_handler) - else (self.parent.client.send(request)) - ) # type: ignore - return response.status_code - except httpx.TimeoutException: - raise RequestTimeoutError() - - def query(self, database_id: str, **kwargs: Any) -> Tuple[List[Page], dict]: - """Get a list of [Pages](https://developers.notion.com/reference/page) contained in the database. - - *[🔗 Endpoint documentation](https://developers.notion.com/reference/post-database-query)* - """ # noqa: E501 - resp: dict = ( - self.retry_handler(super().query, database_id=database_id, **kwargs) - if (self.retry_handler) - else (super().query(database_id=database_id, **kwargs)) - ) # type: ignore - pages = [Page.from_dict(data=p) for p in resp.pop("results")] - for p in pages: - p.properties = map_cells(p.properties) - return pages, resp - - def iterate_query(self, database_id: str, **kwargs: Any) -> Generator[List[Page], None, None]: - while True: - response: dict = ( - self.retry_handler(super().query, database_id=database_id, **kwargs) - if (self.retry_handler) - else (super().query(database_id=database_id, **kwargs)) - ) # type: ignore - pages = [Page.from_dict(data=p) for p in response.pop("results", [])] - for p in pages: - p.properties = map_cells(p.properties) - yield pages - - next_cursor = response.get("next_cursor") - if not response.get("has_more") or not next_cursor: - return - - -class BlocksEndpoint(NotionBlocksEndpoint): - def __init__( - self, - *args: Any, - retry_strategy_config: Optional[RetryStrategyConfig] = None, - **kwargs: Any, - ) -> None: - super().__init__(*args, **kwargs) - self.retry_strategy_config = retry_strategy_config - self.children = BlocksChildrenEndpoint( - retry_strategy_config=retry_strategy_config, - *args, - **kwargs, - ) - - @property - def retry_handler(self) -> Optional[RetryHandler]: - return get_retry_handler(self) - - def retrieve(self, block_id: str, **kwargs: Any) -> Block: - resp: dict = ( - self.retry_handler(super().retrieve, block_id=block_id, **kwargs) - if (self.retry_handler) - else (super().retrieve(block_id=block_id, **kwargs)) - ) # type: ignore - return Block.from_dict(data=resp) - - -class PagesEndpoint(NotionPagesEndpoint): - def __init__( - self, - *args, - retry_strategy_config: Optional[RetryStrategyConfig] = None, - **kwargs, - ): - super().__init__(*args, **kwargs) - self.retry_strategy_config = retry_strategy_config - - @property - def retry_handler(self) -> Optional[RetryHandler]: - return get_retry_handler(self) - - def retrieve(self, page_id: str, **kwargs: Any) -> Page: - resp: dict = ( - self.retry_handler(super().retrieve, page_id=page_id, **kwargs) - if (self.retry_handler) - else (super().retrieve(page_id=page_id, **kwargs)) - ) # type: ignore - return Page.from_dict(data=resp) - - def retrieve_status(self, page_id: str, **kwargs) -> int: - request = self.parent._build_request( - method="HEAD", - path=f"pages/{page_id}", - auth=kwargs.get("auth"), - ) - try: - response: httpx.Response = ( - self.retry_handler(self.parent.client.send, request) - if (self.retry_handler) - else (self.parent.client.send(request)) - ) # type: ignore - return response.status_code - except httpx.TimeoutException: - raise RequestTimeoutError() - - -class Client(NotionClient): - def __init__( - self, - *args: Any, - retry_strategy_config: Optional[RetryStrategyConfig] = None, - **kwargs: Any, - ) -> None: - super().__init__(*args, **kwargs) - self.blocks = BlocksEndpoint(retry_strategy_config=retry_strategy_config, parent=self) - self.pages = PagesEndpoint(retry_strategy_config=retry_strategy_config, parent=self) - self.databases = DatabasesEndpoint(retry_strategy_config=retry_strategy_config, parent=self) diff --git a/unstructured/ingest/connector/notion/connector.py b/unstructured/ingest/connector/notion/connector.py deleted file mode 100644 index c9588cc47..000000000 --- a/unstructured/ingest/connector/notion/connector.py +++ /dev/null @@ -1,468 +0,0 @@ -import typing as t -from dataclasses import dataclass, field -from pathlib import Path -from uuid import UUID - -import httpx - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - RetryStrategyConfig, - SourceConnectorCleanupMixin, -) -from unstructured.ingest.logger import logger -from unstructured.utils import ( - requires_dependencies, -) - -NOTION_API_VERSION = "2022-06-28" -if t.TYPE_CHECKING: - from unstructured.ingest.connector.notion.client import Client as NotionClient - - -@dataclass -class NotionAccessConfig(AccessConfig): - notion_api_key: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleNotionConfig(BaseConnectorConfig): - """Connector config to process all messages by channel id's.""" - - access_config: NotionAccessConfig - page_ids: t.Optional[t.List[str]] = None - database_ids: t.Optional[t.List[str]] = None - recursive: bool = False - - def __post_init__(self): - if self.page_ids: - self.page_ids = [str(UUID(p.strip())) for p in self.page_ids] - - if self.database_ids: - self.database_ids = [str(UUID(d.strip())) for d in self.database_ids] - - -@dataclass -class NotionPageIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - - Also includes a cleanup method. When things go wrong and the cleanup - method is not called, the file is left behind on the filesystem to assist debugging. - """ - - page_id: str - connector_config: SimpleNotionConfig - registry_name: str = "notion_page" - retry_strategy_config: t.Optional[RetryStrategyConfig] = None - - def _tmp_download_file(self): - page_file = self.page_id + ".html" - return Path(self.read_config.download_dir) / page_file - - @property - def _output_filename(self): - page_file = self.page_id + ".json" - return Path(self.processor_config.output_dir) / page_file - - def _create_full_tmp_dir_path(self): - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_client(self): - from unstructured.ingest.connector.notion.client import Client as NotionClient - - # Pin the version of the api to avoid schema changes - return NotionClient( - notion_version=NOTION_API_VERSION, - auth=self.connector_config.access_config.notion_api_key, - logger=logger, - log_level=logger.level, - retry_strategy_config=self.retry_strategy_config, - ) - - @BaseSingleIngestDoc.skip_if_file_exists - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_file(self): - from notion_client import APIErrorCode, APIResponseError - - from unstructured.ingest.connector.notion.helpers import extract_page_html - - self._create_full_tmp_dir_path() - - client = self.get_client() - - try: - text_extraction = extract_page_html( - client=client, - page_id=self.page_id, - logger=logger, - ) - self.check_exists = True - self.file_exists = True - if html := text_extraction.html: - with open(self._tmp_download_file(), "w") as page_file: - page_file.write(html.render(pretty=True)) - - except APIResponseError as error: - if error.code == APIErrorCode.ObjectNotFound: - self.check_exists = True - self.file_exists = False - else: - logger.error(f"Error: {error}") - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_file_metadata(self): - from notion_client import APIErrorCode, APIResponseError - - client = self.get_client() - - # The Notion block endpoint gives more hierarchical information (parent,child relationships) - # than the pages endpoint so choosing to use that one to get metadata about the page - try: - self.file_metadata = client.pages.retrieve(page_id=self.page_id) # type: ignore - self.check_exists = True - self.file_exists = True - except APIResponseError as error: - if error.code == APIErrorCode.ObjectNotFound: - self.check_exists = True - self.file_exists = False - else: - logger.error(f"Error: {error}") - - @property - def date_created(self) -> t.Optional[str]: - """The date the document was created on the source system.""" - if not hasattr(self, "file_metadata") or not self.file_metadata: - self.get_file_metadata() - - return self.file_metadata.created_time if self.file_metadata else None - - @property - def date_modified(self) -> t.Optional[str]: - """The date the document was last modified on the source system.""" - if not hasattr(self, "file_metadata") or not self.file_metadata: - self.get_file_metadata() - - return self.file_metadata.last_edited_time if self.file_metadata else None - - @property - def exists(self) -> t.Optional[bool]: - """Whether the document exists on the remote source.""" - if self.check_exists: - return self.file_exists - - self.get_file_metadata() - - return self.file_exists - - @property - def filename(self): - """The filename of the file created from a notion page""" - return self._tmp_download_file() - - -@dataclass -class NotionDatabaseIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - - Also includes a cleanup method. When things go wrong and the cleanup - method is not called, the file is left behind on the filesystem to assist debugging. - """ - - database_id: str - connector_config: SimpleNotionConfig - retry_strategy_config: t.Optional[RetryStrategyConfig] = None - registry_name: str = "notion_database" - - def _tmp_download_file(self): - page_file = self.database_id + ".html" - return Path(self.read_config.download_dir) / page_file - - @property - def _output_filename(self): - page_file = self.database_id + ".json" - return Path(self.processor_config.output_dir) / page_file - - def _create_full_tmp_dir_path(self): - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_client(self): - from unstructured.ingest.connector.notion.client import Client as NotionClient - - # Pin the version of the api to avoid schema changes - return NotionClient( - notion_version=NOTION_API_VERSION, - auth=self.connector_config.access_config.notion_api_key, - logger=logger, - log_level=logger.level, - retry_strategy_config=self.retry_strategy_config, - ) - - @BaseSingleIngestDoc.skip_if_file_exists - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_file(self): - from notion_client import APIErrorCode, APIResponseError - - from unstructured.ingest.connector.notion.helpers import extract_database_html - - self._create_full_tmp_dir_path() - - client = self.get_client() - - try: - text_extraction = extract_database_html( - client=client, - database_id=self.database_id, - logger=logger, - ) - self.check_exists = True - self.file_exists = True - if html := text_extraction.html: - with open(self._tmp_download_file(), "w") as page_file: - page_file.write(html.render(pretty=True)) - - except APIResponseError as error: - if error.code == APIErrorCode.ObjectNotFound: - self.check_exists = True - self.file_exists = False - else: - logger.error(f"Error: {error}") - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_file_metadata(self): - from notion_client import APIErrorCode, APIResponseError - - client = self.get_client() - - # The Notion block endpoint gives more hierarchical information (parent,child relationships) - # than the pages endpoint so choosing to use that one to get metadata about the page - try: - self.file_metadata = client.databases.retrieve( - database_id=self.database_id, - ) # type: ignore - self.check_exists = True - self.file_exists = True - except APIResponseError as error: - if error.code == APIErrorCode.ObjectNotFound: - self.check_exists = True - self.file_exists = False - else: - logger.error(f"Error: {error}") - - @property - def date_created(self) -> t.Optional[str]: - """The date the document was created on the source system.""" - if not hasattr(self, "file_metadata") or not self.file_metadata: - self.get_file_metadata() - - return self.file_metadata.created_time if self.file_metadata else None - - @property - def date_modified(self) -> t.Optional[str]: - """The date the document was last modified on the source system.""" - if not hasattr(self, "file_metadata") or not self.file_metadata: - self.get_file_metadata() - - return self.file_metadata.last_edited_time if self.file_metadata else None - - @property - def exists(self) -> t.Optional[bool]: - """Whether the document exists on the remote source.""" - if self.check_exists: - return self.file_exists - - self.get_file_metadata() - - return self.file_exists - - @property - def filename(self): - """The filename of the file created from a notion page""" - return self._tmp_download_file() - - -@dataclass -class NotionSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Objects of this class support fetching document(s) from""" - - connector_config: SimpleNotionConfig - retry_strategy_config: t.Optional[RetryStrategyConfig] = None - _client: t.Optional["NotionClient"] = field(init=False, default=None) - - @property - def client(self) -> "NotionClient": - if self._client is None: - self._client = self.create_client() - return self._client - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def create_client(self) -> "NotionClient": - from unstructured.ingest.connector.notion.client import Client as NotionClient - - return NotionClient( - notion_version=NOTION_API_VERSION, - auth=self.connector_config.access_config.notion_api_key, - logger=logger, - log_level=logger.level, - retry_strategy_config=self.retry_strategy_config, - ) - - def check_connection(self): - try: - request = self.client._build_request("HEAD", "users") - response = self.client.client.send(request) - response.raise_for_status() - except httpx.HTTPStatusError as http_error: - logger.error(f"failed to validate connection: {http_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {http_error}") - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def initialize(self): - """Verify that can get metadata for an object, validates connections info.""" - _ = self.client - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_child_page_content(self, page_id: str): - from unstructured.ingest.connector.notion.helpers import ( - get_recursive_content_from_page, - ) - - # sanity check that database id is valid - resp_code = self.client.pages.retrieve_status(page_id=page_id) - if resp_code != 200: - raise ValueError( - f"page associated with page id could not be found: {page_id}", - ) - - child_content = get_recursive_content_from_page( - client=self.client, - page_id=page_id, - logger=logger, - ) - return child_content - - def get_child_content(self, page_id: str): - from unstructured.ingest.connector.notion.helpers import ( - get_recursive_content_from_page, - ) - - child_content = get_recursive_content_from_page( - client=self.client, - page_id=page_id, - logger=logger, - ) - return child_content - - @requires_dependencies(dependencies=["notion_client"], extras="notion") - def get_child_database_content(self, database_id: str): - from unstructured.ingest.connector.notion.helpers import ( - get_recursive_content_from_database, - ) - - # sanity check that database id is valid - resp_code = self.client.databases.retrieve_status(database_id=database_id) - if resp_code != 200: - raise ValueError( - f"database associated with database id could not be found: {database_id}", - ) - - child_content = get_recursive_content_from_database( - client=self.client, - database_id=database_id, - logger=logger, - ) - return child_content - - def get_ingest_docs(self): - docs: t.List[BaseSingleIngestDoc] = [] - if self.connector_config.page_ids: - docs += [ - NotionPageIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - retry_strategy_config=self.retry_strategy_config, - read_config=self.read_config, - page_id=page_id, - ) - for page_id in self.connector_config.page_ids - ] - if self.connector_config.database_ids: - docs += [ - NotionDatabaseIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - retry_strategy_config=self.retry_strategy_config, - read_config=self.read_config, - database_id=database_id, - ) - for database_id in self.connector_config.database_ids - ] - if self.connector_config.recursive: - logger.info("Getting recursive content") - child_pages = [] - child_databases = [] - if self.connector_config.page_ids: - for page_id in self.connector_config.page_ids: - child_content = self.get_child_page_content(page_id=page_id) - child_pages.extend(child_content.child_pages) - child_databases.extend(child_content.child_databases) - - if self.connector_config.database_ids: - for database_id in self.connector_config.database_ids: - child_content = self.get_child_database_content(database_id=database_id) - child_pages.extend(child_content.child_pages) - child_databases.extend(child_content.child_databases) - - # Remove duplicates - child_pages = list(set(child_pages)) - if self.connector_config.page_ids: - child_pages = [c for c in child_pages if c not in self.connector_config.page_ids] - - child_databases = list(set(child_databases)) - if self.connector_config.database_ids: - child_databases = [ - db for db in child_databases if db not in self.connector_config.database_ids - ] - - if child_pages: - logger.info( - "Adding the following child page ids: {}".format(", ".join(child_pages)), - ) - docs += [ - NotionPageIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - retry_strategy_config=self.retry_strategy_config, - read_config=self.read_config, - page_id=page_id, - ) - for page_id in child_pages - ] - - if child_databases: - logger.info( - "Adding the following child database ids: {}".format( - ", ".join(child_databases), - ), - ) - docs += [ - NotionDatabaseIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - retry_strategy_config=self.retry_strategy_config, - read_config=self.read_config, - database_id=database_id, - ) - for database_id in child_databases - ] - - return docs diff --git a/unstructured/ingest/connector/notion/helpers.py b/unstructured/ingest/connector/notion/helpers.py deleted file mode 100644 index a09fa083b..000000000 --- a/unstructured/ingest/connector/notion/helpers.py +++ /dev/null @@ -1,584 +0,0 @@ -import enum -import logging -from dataclasses import dataclass, field -from typing import List, Optional, Tuple -from urllib.parse import urlparse -from uuid import UUID - -from htmlBuilder.attributes import Style, Type -from htmlBuilder.tags import ( - Body, - Div, - Head, - Html, - HtmlTag, - Ol, - Table, - Td, - Th, - Title, - Tr, - Ul, -) -from notion_client.errors import APIResponseError - -import unstructured.ingest.connector.notion.types.blocks as notion_blocks -from unstructured.ingest.connector.notion.client import Client -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.block import Block -from unstructured.ingest.connector.notion.types.database import Database - - -@dataclass -class TextExtractionResponse: - text: Optional[str] = None - child_pages: List[str] = field(default_factory=list) - child_databases: List[str] = field(default_factory=list) - - -@dataclass -class HtmlExtractionResponse: - html: Optional[HtmlTag] = None - child_pages: List[str] = field(default_factory=list) - child_databases: List[str] = field(default_factory=list) - - -def extract_page_html( - client: Client, - page_id: str, - logger: logging.Logger, -) -> HtmlExtractionResponse: - page_id_uuid = UUID(page_id) - html_elements: List[Tuple[BlockBase, HtmlTag]] = [] - parent_block: Block = client.blocks.retrieve(block_id=page_id) # type: ignore - head = None - if isinstance(parent_block.block, notion_blocks.ChildPage): - head = Head([], Title([], parent_block.block.title)) - child_pages: List[str] = [] - child_databases: List[str] = [] - parents: List[Tuple[int, Block]] = [(0, parent_block)] - processed_block_ids = [] - while len(parents) > 0: - level, parent = parents.pop(0) - parent_html = parent.get_html() - if parent_html: - html_elements.append((parent.block, parent_html)) - logger.debug(f"processing block: {parent}") - if isinstance(parent.block, notion_blocks.ChildPage) and parent.id != str(page_id_uuid): - child_pages.append(parent.id) - continue - if isinstance(parent.block, notion_blocks.ChildDatabase): - child_databases.append(parent.id) - continue - if isinstance(parent.block, notion_blocks.Table): - table_response = build_table(client=client, table=parent) - html_elements.append((parent.block, table_response.table_html)) - child_pages.extend(table_response.child_pages) - child_databases.extend(table_response.child_databases) - continue - if isinstance(parent.block, notion_blocks.ColumnList): - column_html = build_columned_list(client=client, column_parent=parent) - html_elements.append((parent.block, column_html)) - continue - if isinstance(parent.block, notion_blocks.BulletedListItem): - bullet_list_resp = build_bulleted_list_children( - client=client, - bulleted_list_item_parent=parent, - ) - if bullet_list_children := bullet_list_resp.child_list: - html_elements.append((parent.block, bullet_list_children)) - continue - if isinstance(parent.block, notion_blocks.NumberedListItem): - numbered_list_resp = build_numbered_list_children( - client=client, - numbered_list_item_parent=parent, - ) - if numbered_list_children := numbered_list_resp.child_list: - html_elements.append((parent.block, numbered_list_children)) - continue - if parent.block.can_have_children() and parent.has_children: - children = [] - for children_block in client.blocks.children.iterate_list( # type: ignore - block_id=parent.id, - ): - children.extend(children_block) - if children: - logger.debug(f"Adding {len(children)} children from parent: {parent}") - for child in children: - if child.id not in processed_block_ids: - parents.append((level + 1, child)) - processed_block_ids.append(parent) - - # Join list items - joined_html_elements = [] - numbered_list_items = [] - bullet_list_items = [] - for block, html in html_elements: - if isinstance(block, notion_blocks.BulletedListItem): - bullet_list_items.append(html) - continue - if isinstance(block, notion_blocks.NumberedListItem): - numbered_list_items.append(html) - continue - if len(numbered_list_items) > 0: - joined_html_elements.append(Ol([], numbered_list_items)) - numbered_list_items = [] - if len(bullet_list_items) > 0: - joined_html_elements.append(Ul([], bullet_list_items)) - bullet_list_items = [] - joined_html_elements.append(html) - - body = Body([], joined_html_elements) - all_elements = [body] - if head: - all_elements = [head] + all_elements - full_html = Html([], all_elements) - return HtmlExtractionResponse( - full_html, - child_pages=child_pages, - child_databases=child_databases, - ) - - -def extract_database_html( - client: Client, - database_id: str, - logger: logging.Logger, -) -> HtmlExtractionResponse: - logger.debug(f"processing database id: {database_id}") - database: Database = client.databases.retrieve(database_id=database_id) # type: ignore - property_keys = list(database.properties.keys()) - property_keys = sorted(property_keys) - table_html_rows = [] - child_pages: List[str] = [] - child_databases: List[str] = [] - # Create header row - table_html_rows.append(Tr([], [Th([], k) for k in property_keys])) - - all_pages = [] - for page_chunk in client.databases.iterate_query(database_id=database_id): # type: ignore - all_pages.extend(page_chunk) - - logger.debug(f"Creating {len(all_pages)} rows") - for page in all_pages: - if is_database_url(client=client, url=page.url): - child_databases.append(page.id) - if is_page_url(client=client, url=page.url): - child_pages.append(page.id) - properties = page.properties - inner_html = [properties.get(k).get_html() for k in property_keys] # type: ignore - table_html_rows.append( - Tr( - [], - [Td([], cell) for cell in [html if html else Div([], []) for html in inner_html]], - ), - ) - - table_html = Table([], table_html_rows) - - return HtmlExtractionResponse( - html=table_html, - child_pages=child_pages, - child_databases=child_databases, - ) - - -@dataclass -class ChildExtractionResponse: - child_pages: List[str] = field(default_factory=list) - child_databases: List[str] = field(default_factory=list) - - -class QueueEntryType(enum.Enum): - DATABASE = "database" - PAGE = "page" - - -@dataclass -class QueueEntry: - type: QueueEntryType - id: UUID - - -def get_recursive_content_from_page( - client: Client, - page_id: str, - logger: logging.Logger, -) -> ChildExtractionResponse: - return get_recursive_content( - client=client, - init_entry=QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id)), - logger=logger, - ) - - -def get_recursive_content_from_database( - client: Client, - database_id: str, - logger: logging.Logger, -) -> ChildExtractionResponse: - return get_recursive_content( - client=client, - init_entry=QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)), - logger=logger, - ) - - -def get_recursive_content( - client: Client, - init_entry: QueueEntry, - logger: logging.Logger, -) -> ChildExtractionResponse: - parents: List[QueueEntry] = [init_entry] - child_pages: List[str] = [] - child_dbs: List[str] = [] - processed: List[str] = [] - while len(parents) > 0: - parent: QueueEntry = parents.pop() - processed.append(str(parent.id)) - if parent.type == QueueEntryType.PAGE: - logger.debug(f"Getting child data from page: {parent.id}") - page_children = [] - try: - for children_block in client.blocks.children.iterate_list( # type: ignore - block_id=str(parent.id), - ): - page_children.extend(children_block) - except APIResponseError as api_error: - logger.error(f"failed to get page with id {parent.id}: {api_error}") - if str(parent.id) in child_pages: - child_pages.remove(str(parent.id)) - continue - if not page_children: - continue - - # Extract child pages - child_pages_from_page = [ - c for c in page_children if isinstance(c.block, notion_blocks.ChildPage) - ] - if child_pages_from_page: - child_page_blocks: List[notion_blocks.ChildPage] = [ - p.block - for p in child_pages_from_page - if isinstance(p.block, notion_blocks.ChildPage) - ] - logger.debug( - "found child pages from parent page {}: {}".format( - parent.id, - ", ".join([block.title for block in child_page_blocks]), - ), - ) - new_pages = [p.id for p in child_pages_from_page if p.id not in processed] - new_pages = list(set(new_pages)) - child_pages.extend(new_pages) - parents.extend( - [QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages], - ) - - # Extract child databases - child_dbs_from_page = [ - c for c in page_children if isinstance(c.block, notion_blocks.ChildDatabase) - ] - if child_dbs_from_page: - child_db_blocks: List[notion_blocks.ChildDatabase] = [ - c.block - for c in page_children - if isinstance(c.block, notion_blocks.ChildDatabase) - ] - logger.debug( - "found child database from parent page {}: {}".format( - parent.id, - ", ".join([block.title for block in child_db_blocks]), - ), - ) - new_dbs = [db.id for db in child_dbs_from_page if db.id not in processed] - new_dbs = list(set(new_dbs)) - child_dbs.extend(new_dbs) - parents.extend( - [QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs], - ) - - linked_to_others: List[notion_blocks.LinkToPage] = [ - c.block for c in page_children if isinstance(c.block, notion_blocks.LinkToPage) - ] - for link in linked_to_others: - if (page_id := link.page_id) and ( - page_id not in processed and page_id not in child_pages - ): - child_pages.append(page_id) - parents.append(QueueEntry(type=QueueEntryType.PAGE, id=UUID(page_id))) - if (database_id := link.database_id) and ( - database_id not in processed and database_id not in child_dbs - ): - child_dbs.append(database_id) - parents.append( - QueueEntry(type=QueueEntryType.DATABASE, id=UUID(database_id)), - ) - - elif parent.type == QueueEntryType.DATABASE: - logger.debug(f"Getting child data from database: {parent.id}") - database_pages = [] - try: - for page_entries in client.databases.iterate_query( # type: ignore - database_id=str(parent.id), - ): - database_pages.extend(page_entries) - except APIResponseError as api_error: - logger.error(f"failed to get database with id {parent.id}: {api_error}") - if str(parent.id) in child_dbs: - child_dbs.remove(str(parent.id)) - continue - if not database_pages: - continue - - child_pages_from_db = [ - p for p in database_pages if is_page_url(client=client, url=p.url) - ] - if child_pages_from_db: - logger.debug( - "found child pages from parent database {}: {}".format( - parent.id, - ", ".join([p.url for p in child_pages_from_db]), - ), - ) - new_pages = [p.id for p in child_pages_from_db if p.id not in processed] - child_pages.extend(new_pages) - parents.extend( - [QueueEntry(type=QueueEntryType.PAGE, id=UUID(i)) for i in new_pages], - ) - - child_dbs_from_db = [ - p for p in database_pages if is_database_url(client=client, url=p.url) - ] - if child_dbs_from_db: - logger.debug( - "found child database from parent database {}: {}".format( - parent.id, - ", ".join([db.url for db in child_dbs_from_db]), - ), - ) - new_dbs = [db.id for db in child_dbs_from_db if db.id not in processed] - child_dbs.extend(new_dbs) - parents.extend( - [QueueEntry(type=QueueEntryType.DATABASE, id=UUID(i)) for i in new_dbs], - ) - - return ChildExtractionResponse( - child_pages=child_pages, - child_databases=child_dbs, - ) - - -def is_valid_uuid(uuid_str: str) -> bool: - try: - UUID(uuid_str) - return True - except Exception: - return False - - -def get_uuid_from_url(path: str) -> Optional[str]: - strings = path.split("-") - if len(strings) > 0 and is_valid_uuid(strings[-1]): - return strings[-1] - return None - - -def is_page_url(client: Client, url: str): - parsed_url = urlparse(url) - path = parsed_url.path.split("/")[-1] - if parsed_url.netloc != "www.notion.so": - return False - page_uuid = get_uuid_from_url(path=path) - if not page_uuid: - return False - check_resp = client.pages.retrieve_status(page_id=page_uuid) - return check_resp == 200 - - -def is_database_url(client: Client, url: str): - parsed_url = urlparse(url) - path = parsed_url.path.split("/")[-1] - if parsed_url.netloc != "www.notion.so": - return False - database_uuid = get_uuid_from_url(path=path) - if not database_uuid: - return False - check_resp = client.databases.retrieve_status(database_id=database_uuid) - return check_resp == 200 - - -@dataclass -class BuildTableResponse: - table_html: HtmlTag - child_pages: List[str] = field(default_factory=list) - child_databases: List[str] = field(default_factory=list) - - -def build_table(client: Client, table: Block) -> BuildTableResponse: - if not isinstance(table.block, notion_blocks.Table): - raise ValueError(f"block type not table: {type(table.block)}") - rows: List[notion_blocks.TableRow] = [] - child_pages: List[str] = [] - child_databases: List[str] = [] - for row_chunk in client.blocks.children.iterate_list( # type: ignore - block_id=table.id, - ): - rows.extend( - [row.block for row in row_chunk if isinstance(row.block, notion_blocks.TableRow)], - ) - - # Extract child databases and pages - for row in rows: - for c in row.cells: - for rt in c.rich_texts: - if mention := rt.mention: - if mention.type == "page" and (page := mention.page): - child_pages.append(page.id) - if mention.type == "database" and (database := mention.database): - child_databases.append(database.id) - - header: Optional[notion_blocks.TableRow] = None - if table.block.has_column_header: - header = rows.pop(0) - table_html_rows = [] - if header: - header.is_header = True - table_html_rows.append(header.get_html()) - table_html_rows.extend([row.get_html() for row in rows]) - html_table = Table([], table_html_rows) - - return BuildTableResponse( - table_html=html_table, - child_pages=child_pages, - child_databases=child_databases, - ) - - -def build_columned_list(client: Client, column_parent: Block) -> HtmlTag: - if not isinstance(column_parent.block, notion_blocks.ColumnList): - raise ValueError(f"block type not column list: {type(column_parent.block)}") - columns: List[Block] = [] - for column_chunk in client.blocks.children.iterate_list( # type: ignore - block_id=column_parent.id, - ): - columns.extend(column_chunk) - num_columns = len(columns) - columns_content = [] - for column in columns: - for column_content_chunk in client.blocks.children.iterate_list( # type: ignore - block_id=column.id, - ): - columns_content.append( - Div( - [Style(f"width:{100/num_columns}%; float: left")], - [content.block.get_html() for content in column_content_chunk], - ), - ) - - return Div([], columns_content) - - -@dataclass -class BulletedListResponse: - html: HtmlTag - child_list: Optional[HtmlTag] = None - - -bulleted_list_styles = ["circle", "square", "disc"] - - -def build_bulleted_list_children( - client: Client, - bulleted_list_item_parent: Block, - list_style_ind: int = 0, -) -> BulletedListResponse: - if not isinstance(bulleted_list_item_parent.block, notion_blocks.BulletedListItem): - raise ValueError( - f"block type not bulleted list item: {type(bulleted_list_item_parent.block)}", - ) - html = bulleted_list_item_parent.get_html() - if html: - html.attributes = [Style("margin-left: 10px")] - if not bulleted_list_item_parent.has_children: - return BulletedListResponse( - html=html, - ) - children = [] - for child_block in client.blocks.children.iterate_list( # type: ignore - block_id=bulleted_list_item_parent.id, - ): - children.extend(child_block) - if not children: - return BulletedListResponse( - html=bulleted_list_item_parent.get_html(), - ) - child_html = [] - for child in children: - child_resp = build_bulleted_list_children( - client=client, - bulleted_list_item_parent=child, - list_style_ind=(list_style_ind + 1) % len(bulleted_list_styles), - ) - child_html.append(child_resp.html) - if child_children := child_resp.child_list: - child_html.append(child_children) - - return BulletedListResponse( - html=html, - child_list=Ul( - [Style(f"list-style-type: {bulleted_list_styles[list_style_ind]}")], - child_html, - ), - ) - - -@dataclass -class NumberedListResponse: - html: HtmlTag - child_list: Optional[HtmlTag] = None - - -numbered_list_types = ["a", "i", "1"] - - -def build_numbered_list_children( - client: Client, - numbered_list_item_parent: Block, - type_attr_ind=0, -) -> NumberedListResponse: - if not isinstance(numbered_list_item_parent.block, notion_blocks.NumberedListItem): - raise ValueError( - f"block type not numbered list item: {type(numbered_list_item_parent.block)}", - ) - html = numbered_list_item_parent.get_html() - if html: - html.attributes = [Style("margin-left: 10px")] - if not numbered_list_item_parent.has_children: - return NumberedListResponse( - html=html, - ) - children = [] - for child_block in client.blocks.children.iterate_list( # type: ignore - block_id=numbered_list_item_parent.id, - ): - children.extend(child_block) - if not children: - return NumberedListResponse( - html=numbered_list_item_parent.get_html(), - ) - child_html = [] - for child in children: - child_resp = build_numbered_list_children( - client=client, - numbered_list_item_parent=child, - type_attr_ind=(type_attr_ind + 1) % len(numbered_list_types), - ) - child_html.append(child_resp.html) - if child_children := child_resp.child_list: - child_html.append(child_children) - - return NumberedListResponse( - html=html, - child_list=Ol([Type(numbered_list_types[type_attr_ind])], child_html), - ) diff --git a/unstructured/ingest/connector/notion/interfaces.py b/unstructured/ingest/connector/notion/interfaces.py deleted file mode 100644 index bcfa788d5..000000000 --- a/unstructured/ingest/connector/notion/interfaces.py +++ /dev/null @@ -1,32 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Optional - -from htmlBuilder.tags import HtmlTag - - -class FromJSONMixin(ABC): - @classmethod - @abstractmethod - def from_dict(cls, data: dict): - pass - - -class GetHTMLMixin(ABC): - @abstractmethod - def get_html(self) -> Optional[HtmlTag]: - pass - - -class BlockBase(FromJSONMixin, GetHTMLMixin): - @staticmethod - @abstractmethod - def can_have_children() -> bool: - pass - - -class DBPropertyBase(FromJSONMixin): - pass - - -class DBCellBase(FromJSONMixin, GetHTMLMixin): - pass diff --git a/unstructured/ingest/connector/notion/types/__init__.py b/unstructured/ingest/connector/notion/types/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/connector/notion/types/block.py b/unstructured/ingest/connector/notion/types/block.py deleted file mode 100644 index 7159816d9..000000000 --- a/unstructured/ingest/connector/notion/types/block.py +++ /dev/null @@ -1,95 +0,0 @@ -# https://developers.notion.com/reference/page -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - BlockBase, - FromJSONMixin, - GetHTMLMixin, -) -from unstructured.ingest.connector.notion.types import blocks -from unstructured.ingest.connector.notion.types.parent import Parent -from unstructured.ingest.connector.notion.types.user import PartialUser - -block_type_mapping = { - "bookmark": blocks.Bookmark, - "breadcrumb": blocks.Breadcrumb, - "bulleted_list_item": blocks.BulletedListItem, - "callout": blocks.Callout, - "child_database": blocks.ChildDatabase, - "child_page": blocks.ChildPage, - "code": blocks.Code, - "column": blocks.Column, - "column_list": blocks.ColumnList, - "divider": blocks.Divider, - "heading_1": blocks.Heading, - "heading_2": blocks.Heading, - "heading_3": blocks.Heading, - "embed": blocks.Embed, - "equation": blocks.Equation, - "file": blocks.File, - "image": blocks.Image, - "link_preview": blocks.LinkPreview, - "link_to_page": blocks.LinkToPage, - "numbered_list_item": blocks.NumberedListItem, - "paragraph": blocks.Paragraph, - "pdf": blocks.PDF, - "quote": blocks.Quote, - "synced_block": blocks.SyncBlock, - "table": blocks.Table, - "table_of_contents": blocks.TableOfContents, - "table_row": blocks.TableRow, - "template": blocks.Template, - "to_do": blocks.ToDo, - "toggle": blocks.Toggle, - "unsupported": blocks.Unsupported, - "video": blocks.Video, -} - - -@dataclass -class Block(FromJSONMixin, GetHTMLMixin): - id: str - type: str - created_time: str - created_by: PartialUser - last_edited_time: str - last_edited_by: PartialUser - archived: bool - has_children: bool - parent: Parent - block: BlockBase - object: str = "block" - request_id: Optional[str] = None - - def __repr__(self): - return f"{self.__class__.__name__}(id={self.id}, type={self.type})" - - @classmethod - def from_dict(cls, data: dict): - t = data["type"] - block_data = data.pop(t) - created_by = data.pop("created_by") - last_edited_by = data.pop("last_edited_by") - parent = data.pop("parent") - try: - block = cls( - created_by=PartialUser.from_dict(created_by), - last_edited_by=PartialUser.from_dict(last_edited_by), - parent=Parent.from_dict(parent), - block=block_type_mapping[t].from_dict(block_data), # type: ignore - **data, - ) - except KeyError as ke: - raise KeyError(f"failed to map to associated block type -> {t}: {block_data}") from ke - except TypeError as te: - raise TypeError(f"failed to map to associated block type -> {t}: {block_data}") from te - - return block - - def get_html(self) -> Optional[HtmlTag]: - if self.block: - return self.block.get_html() - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/__init__.py b/unstructured/ingest/connector/notion/types/blocks/__init__.py deleted file mode 100644 index 5cd158bc8..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/__init__.py +++ /dev/null @@ -1,63 +0,0 @@ -from .bookmark import Bookmark -from .breadcrumb import Breadcrumb -from .bulleted_list_item import BulletedListItem -from .callout import Callout -from .child_database import ChildDatabase -from .child_page import ChildPage -from .code import Code -from .column_list import Column, ColumnList -from .divider import Divider -from .embed import Embed -from .equation import Equation -from .file import File -from .heading import Heading -from .image import Image -from .link_preview import LinkPreview -from .link_to_page import LinkToPage -from .numbered_list import NumberedListItem -from .paragraph import Paragraph -from .pdf import PDF -from .quote import Quote -from .synced_block import DuplicateSyncedBlock, OriginalSyncedBlock, SyncBlock -from .table import Table, TableRow -from .table_of_contents import TableOfContents -from .template import Template -from .todo import ToDo -from .toggle import Toggle -from .unsupported import Unsupported -from .video import Video - -__all__ = [ - "Bookmark", - "Breadcrumb", - "BulletedListItem", - "Callout", - "ChildDatabase", - "ChildPage", - "Code", - "Column", - "ColumnList", - "Divider", - "Embed", - "Equation", - "File", - "Heading", - "Image", - "LinkPreview", - "LinkToPage", - "NumberedListItem", - "Paragraph", - "PDF", - "Quote", - "SyncBlock", - "OriginalSyncedBlock", - "DuplicateSyncedBlock", - "Table", - "TableRow", - "TableOfContents", - "Template", - "ToDo", - "Toggle", - "Unsupported", - "Video", -] diff --git a/unstructured/ingest/connector/notion/types/blocks/bookmark.py b/unstructured/ingest/connector/notion/types/blocks/bookmark.py deleted file mode 100644 index 46804475f..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/bookmark.py +++ /dev/null @@ -1,40 +0,0 @@ -# https://developers.notion.com/reference/block#bookmark -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, Br, Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Bookmark(BlockBase): - url: str - caption: List[RichText] = field(default_factory=list) - - @classmethod - def from_dict(cls, data: dict): - captions = data.pop("caption", []) - return cls( - url=data["url"], - caption=[RichText.from_dict(c) for c in captions], - ) - - def get_html(self) -> Optional[HtmlTag]: - texts = [] - if self.url: - texts.append(A([Href(self.url)], self.url)) - if self.caption: - texts.append(Div([], [rt.get_html() for rt in self.caption])) - if not texts: - return None - joined = [Br()] * (len(texts) * 2 - 1) - joined[0::2] = texts - - return Div([], joined) - - @staticmethod - def can_have_children() -> bool: - return False diff --git a/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py b/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py deleted file mode 100644 index d6b1626a2..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/breadcrumb.py +++ /dev/null @@ -1,21 +0,0 @@ -# https://developers.notion.com/reference/block#breadcrumb -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class Breadcrumb(BlockBase): - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls() - - def get_html(self) -> Optional[HtmlTag]: - pass diff --git a/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py b/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py deleted file mode 100644 index 5db911dd2..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/bulleted_list_item.py +++ /dev/null @@ -1,31 +0,0 @@ -# https://developers.notion.com/reference/block#bulleted-list-item -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import HtmlTag, Li - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class BulletedListItem(BlockBase): - color: str - children: List[dict] = field(default_factory=list) - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - return cls( - color=data["color"], - children=data.get("children", []), - rich_text=[RichText.from_dict(rt) for rt in rich_text], - ) - - def get_html(self) -> Optional[HtmlTag]: - return Li([], [rt.get_html() for rt in self.rich_text]) diff --git a/unstructured/ingest/connector/notion/types/blocks/callout.py b/unstructured/ingest/connector/notion/types/blocks/callout.py deleted file mode 100644 index 6ea2bb130..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/callout.py +++ /dev/null @@ -1,94 +0,0 @@ -# https://developers.notion.com/reference/block#callout -from dataclasses import dataclass, field -from typing import List, Optional, Union - -from htmlBuilder.attributes import Href, Style -from htmlBuilder.tags import A, Div, HtmlTag, P - -from unstructured.ingest.connector.notion.interfaces import ( - BlockBase, - FromJSONMixin, - GetHTMLMixin, -) -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class EmojiIcon(FromJSONMixin, GetHTMLMixin): - emoji: str - type: str = "emoji" - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return P([], self.emoji) - - -@dataclass -class ExternalIconContent(FromJSONMixin): - url: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class ExternalIcon(FromJSONMixin, GetHTMLMixin): - external: ExternalIconContent - type: str = "external" - - @classmethod - def from_dict(cls, data: dict): - return cls(external=ExternalIconContent.from_dict(data=data.pop("external")), **data) - - def get_html(self) -> Optional[HtmlTag]: - if self.external: - return A([Href(self.external.url)], [self.external.url]) - else: - return None - - -class Icon(FromJSONMixin): - @classmethod - def from_dict(cls, data: dict) -> Union[EmojiIcon, ExternalIcon]: - t = data.get("type") - if t == "emoji": - return EmojiIcon.from_dict(data) - elif t == "external": - return ExternalIcon.from_dict(data) - else: - raise ValueError(f"Unexpected icon type: {t} ({data})") - - -@dataclass -class Callout(BlockBase): - color: str - icon: Optional[Union[EmojiIcon, ExternalIcon]] = None - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - return cls( - color=data["color"], - icon=Icon.from_dict(data.pop("icon")), - rich_text=[RichText.from_dict(rt) for rt in rich_text], - ) - - def get_html(self) -> Optional[HtmlTag]: - elements = [] - if self.icon and self.icon.get_html(): - elements.append(self.icon.get_html()) - if self.rich_text: - elements.extend([rt.get_html() for rt in self.rich_text]) - attributes = [] - if self.color: - attributes.append(Style(f"color:{self.color}")) - return Div(attributes, elements) diff --git a/unstructured/ingest/connector/notion/types/blocks/child_database.py b/unstructured/ingest/connector/notion/types/blocks/child_database.py deleted file mode 100644 index 578b400f2..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/child_database.py +++ /dev/null @@ -1,23 +0,0 @@ -# https://developers.notion.com/reference/block#child-database -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag, P - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class ChildDatabase(BlockBase): - title: str - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return P([], self.title) diff --git a/unstructured/ingest/connector/notion/types/blocks/child_page.py b/unstructured/ingest/connector/notion/types/blocks/child_page.py deleted file mode 100644 index 6ee6f9047..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/child_page.py +++ /dev/null @@ -1,23 +0,0 @@ -# https://developers.notion.com/reference/block#child-page -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag, P - -from unstructured.ingest.connector.notion.interfaces import BlockBase, GetHTMLMixin - - -@dataclass -class ChildPage(BlockBase, GetHTMLMixin): - title: str - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return P([], self.title) diff --git a/unstructured/ingest/connector/notion/types/blocks/code.py b/unstructured/ingest/connector/notion/types/blocks/code.py deleted file mode 100644 index 3a6d80e36..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/code.py +++ /dev/null @@ -1,43 +0,0 @@ -# https://developers.notion.com/reference/block#code -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Br, Div, HtmlTag -from htmlBuilder.tags import Code as HtmlCode - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Code(BlockBase): - language: str - rich_text: List[RichText] = field(default_factory=list) - caption: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - caption = data.pop("caption", []) - return cls( - language=data["language"], - rich_text=[RichText.from_dict(rt) for rt in rich_text], - caption=[RichText.from_dict(c) for c in caption], - ) - - def get_html(self) -> Optional[HtmlTag]: - texts = [] - if self.rich_text: - texts.append(HtmlCode([], [rt.get_html() for rt in self.rich_text])) - if self.caption: - texts.append(Div([], [rt.get_html() for rt in self.caption])) - if not texts: - return None - joined = [Br()] * (len(texts) * 2 - 1) - joined[0::2] = texts - - return Div([], joined) diff --git a/unstructured/ingest/connector/notion/types/blocks/column_list.py b/unstructured/ingest/connector/notion/types/blocks/column_list.py deleted file mode 100644 index d2df367c2..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/column_list.py +++ /dev/null @@ -1,35 +0,0 @@ -# https://developers.notion.com/reference/block#column-list-and-column -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class ColumnList(BlockBase): - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls() - - def get_html(self) -> Optional[HtmlTag]: - return None - - -@dataclass -class Column(BlockBase): - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls() - - def get_html(self) -> Optional[HtmlTag]: - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/divider.py b/unstructured/ingest/connector/notion/types/blocks/divider.py deleted file mode 100644 index 33fc01e7b..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/divider.py +++ /dev/null @@ -1,22 +0,0 @@ -# https://developers.notion.com/reference/block#divider -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Hr, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class Divider(BlockBase): - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls() - - def get_html(self) -> Optional[HtmlTag]: - return Hr([Style("border-top: 3px solid #bbb")]) diff --git a/unstructured/ingest/connector/notion/types/blocks/embed.py b/unstructured/ingest/connector/notion/types/blocks/embed.py deleted file mode 100644 index 561fe828a..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/embed.py +++ /dev/null @@ -1,36 +0,0 @@ -# https://developers.notion.com/reference/block#embed -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, Br, Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Embed(BlockBase): - url: str - caption: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls(caption=[RichText.from_dict(d) for d in data.pop("caption", [])], **data) - - def get_html(self) -> Optional[HtmlTag]: - texts = [] - if self.url: - texts.append(A([Href(self.url)], self.url)) - if self.caption: - texts.append(Div([], [rt.get_html() for rt in self.caption])) - if not texts: - return None - joined = [Br()] * (len(texts) * 2 - 1) - joined[0::2] = texts - - return Div([], joined) diff --git a/unstructured/ingest/connector/notion/types/blocks/equation.py b/unstructured/ingest/connector/notion/types/blocks/equation.py deleted file mode 100644 index ccab3d04d..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/equation.py +++ /dev/null @@ -1,23 +0,0 @@ -# https://developers.notion.com/reference/block#equation -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class Equation(BlockBase): - expression: str - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Div([], self.expression) diff --git a/unstructured/ingest/connector/notion/types/blocks/file.py b/unstructured/ingest/connector/notion/types/blocks/file.py deleted file mode 100644 index ad7fe54be..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/file.py +++ /dev/null @@ -1,49 +0,0 @@ -# https://developers.notion.com/reference/block#file -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, Br, Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.file import External -from unstructured.ingest.connector.notion.types.file import File as FileContent -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class File(BlockBase): - type: str - external: Optional[External] = None - file: Optional[FileContent] = None - caption: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - caption = [RichText.from_dict(rt) for rt in data.pop("caption", [])] - t = data["type"] - file = cls(type=t, caption=caption) - if t == "external": - file.external = External.from_dict(data["external"]) - elif t == "file": - file.file = FileContent.from_dict(data["file"]) - return file - - def get_html(self) -> Optional[HtmlTag]: - texts = [] - if self.file: - texts.append(A([Href(self.file.url)], self.file.url)) - if self.external: - texts.append(A([Href(self.external.url)], self.external.url)) - if self.caption: - texts.append(Div([], [rt.get_html() for rt in self.caption])) - if not texts: - return None - joined = [Br()] * (len(texts) * 2 - 1) - joined[0::2] = texts - - return Div([], joined) diff --git a/unstructured/ingest/connector/notion/types/blocks/heading.py b/unstructured/ingest/connector/notion/types/blocks/heading.py deleted file mode 100644 index 86983f585..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/heading.py +++ /dev/null @@ -1,37 +0,0 @@ -# https://developers.notion.com/reference/block#headings -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Heading(BlockBase): - color: str - is_toggleable: bool - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - heading = cls(**data) - heading.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return heading - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return None - - texts = [rt.get_html() for rt in self.rich_text] - attributes = [] - if self.color and self.color != "default": - attributes.append(Style(f"color: {self.color}")) - return Div(attributes, texts) diff --git a/unstructured/ingest/connector/notion/types/blocks/image.py b/unstructured/ingest/connector/notion/types/blocks/image.py deleted file mode 100644 index d9c5203c4..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/image.py +++ /dev/null @@ -1,21 +0,0 @@ -# https://developers.notion.com/reference/block#image -from typing import Optional - -from htmlBuilder.attributes import Src -from htmlBuilder.tags import HtmlTag, Img - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.file import FileObject - - -class Image(BlockBase, FileObject): - @staticmethod - def can_have_children() -> bool: - return False - - def get_html(self) -> Optional[HtmlTag]: - if self.external: - return Img([Src(self.external.url)], []) - if self.file: - return Img([Src(self.file.url)], []) - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/link_preview.py b/unstructured/ingest/connector/notion/types/blocks/link_preview.py deleted file mode 100644 index 913df1f72..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/link_preview.py +++ /dev/null @@ -1,24 +0,0 @@ -# https://developers.notion.com/reference/block#link-preview -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class LinkPreview(BlockBase): - url: str - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return A([Href(self.url)], self.url) diff --git a/unstructured/ingest/connector/notion/types/blocks/link_to_page.py b/unstructured/ingest/connector/notion/types/blocks/link_to_page.py deleted file mode 100644 index ed9156d26..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/link_to_page.py +++ /dev/null @@ -1,29 +0,0 @@ -# https://developers.notion.com/reference/block#link-to-page -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class LinkToPage(BlockBase): - type: str - page_id: Optional[str] = None - database_id: Optional[str] = None - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - if page_id := self.page_id: - return Div([], page_id) - if database_id := self.database_id: - return Div([], database_id) - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/numbered_list.py b/unstructured/ingest/connector/notion/types/blocks/numbered_list.py deleted file mode 100644 index b0051bc80..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/numbered_list.py +++ /dev/null @@ -1,29 +0,0 @@ -# https://developers.notion.com/reference/block#numbered-list-item -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import HtmlTag, Li - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class NumberedListItem(BlockBase): - color: str - children: List[dict] = field(default_factory=list) - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - numbered_list = cls(**data) - numbered_list.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return numbered_list - - def get_html(self) -> Optional[HtmlTag]: - return Li([], [rt.get_html() for rt in self.rich_text]) diff --git a/unstructured/ingest/connector/notion/types/blocks/paragraph.py b/unstructured/ingest/connector/notion/types/blocks/paragraph.py deleted file mode 100644 index bc31e4cba..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/paragraph.py +++ /dev/null @@ -1,31 +0,0 @@ -# https://developers.notion.com/reference/block#paragraph -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Br, Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Paragraph(BlockBase): - color: str - children: List[dict] = field(default_factory=list) - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - paragraph = cls(**data) - paragraph.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return paragraph - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return Br() - return Div([], [rt.get_html() for rt in self.rich_text]) diff --git a/unstructured/ingest/connector/notion/types/blocks/pdf.py b/unstructured/ingest/connector/notion/types/blocks/pdf.py deleted file mode 100644 index 61ef3a820..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/pdf.py +++ /dev/null @@ -1,49 +0,0 @@ -# https://developers.notion.com/reference/block#pdf -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, Br, Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.file import External, File -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class PDF(BlockBase): - type: str - caption: List[RichText] = field(default_factory=list) - external: Optional[External] = None - file: Optional[File] = None - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - caption = data.pop("caption", []) - t = data["type"] - paragraph = cls(type=t) - paragraph.caption = [RichText.from_dict(c) for c in caption] - if t == "external": - paragraph.external = External.from_dict(data["external"]) - elif t == "file": - paragraph.file = File.from_dict(data["file"]) - return paragraph - - def get_html(self) -> Optional[HtmlTag]: - texts = [] - if self.external: - texts.append(A([Href(self.external.url)], self.external.url)) - if self.file: - texts.append(A([Href(self.file.url)], self.file.url)) - if self.caption: - texts.append(Div([], [rt.get_html() for rt in self.caption])) - if not texts: - return None - joined = [Br()] * (len(texts) * 2 - 1) - joined[0::2] = texts - - return Div([], joined) diff --git a/unstructured/ingest/connector/notion/types/blocks/quote.py b/unstructured/ingest/connector/notion/types/blocks/quote.py deleted file mode 100644 index 1469f1d2a..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/quote.py +++ /dev/null @@ -1,37 +0,0 @@ -# https://developers.notion.com/reference/block#quote -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Quote(BlockBase): - color: str - children: List[dict] = field(default_factory=list) - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - quote = cls(**data) - quote.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return quote - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return None - - texts = [rt.get_html() for rt in self.rich_text] - attributes = [] - if self.color and self.color != "default": - attributes.append(Style(f"color: {self.color}")) - return Div(attributes, texts) diff --git a/unstructured/ingest/connector/notion/types/blocks/synced_block.py b/unstructured/ingest/connector/notion/types/blocks/synced_block.py deleted file mode 100644 index b4cd2da10..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/synced_block.py +++ /dev/null @@ -1,57 +0,0 @@ -# https://developers.notion.com/reference/block#synced-block -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class OriginalSyncedBlock(BlockBase): - synced_from: Optional[str] = None - children: List[dict] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls(children=data["children"]) - - def get_html(self) -> Optional[HtmlTag]: - return None - - -@dataclass -class DuplicateSyncedBlock(BlockBase): - type: str - block_id: str - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return None - - -class SyncBlock(BlockBase): - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - if "synced_from" in data: - return OriginalSyncedBlock.from_dict(data) - else: - return DuplicateSyncedBlock.from_dict(data) - - def get_html(self) -> Optional[HtmlTag]: - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/table.py b/unstructured/ingest/connector/notion/types/blocks/table.py deleted file mode 100644 index 785827563..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/table.py +++ /dev/null @@ -1,63 +0,0 @@ -# https://developers.notion.com/reference/block#table -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import HtmlTag, Td, Th, Tr - -from unstructured.ingest.connector.notion.interfaces import ( - BlockBase, - FromJSONMixin, -) -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Table(BlockBase): - table_width: int - has_column_header: bool - has_row_header: bool - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return None - - -@dataclass -class TableCell(FromJSONMixin): - rich_texts: List[RichText] - - @classmethod - def from_dict(cls, data: dict): - return cls(rich_texts=[RichText.from_dict(rt) for rt in data.pop("rich_texts", [])]) - - def get_html(self, is_header: bool) -> Optional[HtmlTag]: - if is_header: - return Th([], [rt.get_html() for rt in self.rich_texts]) - else: - return Td([], [rt.get_html() for rt in self.rich_texts]) - - -# https://developers.notion.com/reference/block#table-rows -@dataclass -class TableRow(BlockBase): - is_header: bool = False - cells: List[TableCell] = field(default_factory=list) - - @classmethod - def from_dict(cls, data: dict): - cells = data.get("cells", []) - return cls(cells=[TableCell.from_dict({"rich_texts": c}) for c in cells]) - - @staticmethod - def can_have_children() -> bool: - return False - - def get_html(self) -> Optional[HtmlTag]: - return Tr([], [cell.get_html(is_header=self.is_header) for cell in self.cells]) diff --git a/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py b/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py deleted file mode 100644 index f753f6074..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/table_of_contents.py +++ /dev/null @@ -1,23 +0,0 @@ -# https://developers.notion.com/reference/block#table-of-contents -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class TableOfContents(BlockBase): - color: str - - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/template.py b/unstructured/ingest/connector/notion/types/blocks/template.py deleted file mode 100644 index 45056876f..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/template.py +++ /dev/null @@ -1,30 +0,0 @@ -# https://developers.notion.com/reference/block#template -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Template(BlockBase): - children: List[dict] = field(default_factory=list) - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - template = cls(**data) - template.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return template - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return None - return Div([], [rt.get_html() for rt in self.rich_text]) diff --git a/unstructured/ingest/connector/notion/types/blocks/todo.py b/unstructured/ingest/connector/notion/types/blocks/todo.py deleted file mode 100644 index 3e03b2ce0..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/todo.py +++ /dev/null @@ -1,42 +0,0 @@ -# https://developers.notion.com/reference/block#to-do -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Checked, Style, Type -from htmlBuilder.tags import Div, HtmlTag, Input - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class ToDo(BlockBase): - color: str - checked: bool = False - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - todo = cls(**data) - todo.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return todo - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return None - - elements = [] - check_input_attributes = [Type("checkbox")] - if self.checked: - check_input_attributes.append(Checked("")) - elements.append(Input(check_input_attributes)) - elements.extend([rt.get_html() for rt in self.rich_text]) - attributes = [] - if self.color and self.color != "default": - attributes.append(Style(f"color: {self.color}")) - return Div(attributes, elements) diff --git a/unstructured/ingest/connector/notion/types/blocks/toggle.py b/unstructured/ingest/connector/notion/types/blocks/toggle.py deleted file mode 100644 index 8619eb7de..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/toggle.py +++ /dev/null @@ -1,37 +0,0 @@ -# https://developers.notion.com/reference/block#toggle-blocks -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Toggle(BlockBase): - color: str - children: List[dict] = field(default_factory=list) - rich_text: List[RichText] = field(default_factory=list) - - @staticmethod - def can_have_children() -> bool: - return True - - @classmethod - def from_dict(cls, data: dict): - rich_text = data.pop("rich_text", []) - toggle = cls(**data) - toggle.rich_text = [RichText.from_dict(rt) for rt in rich_text] - return toggle - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return None - - texts = [rt.get_html() for rt in self.rich_text] - attributes = [] - if self.color and self.color != "default": - attributes.append(Style(f"color: {self.color}")) - return Div(attributes, texts) diff --git a/unstructured/ingest/connector/notion/types/blocks/unsupported.py b/unstructured/ingest/connector/notion/types/blocks/unsupported.py deleted file mode 100644 index 6e28b8cf2..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/unsupported.py +++ /dev/null @@ -1,20 +0,0 @@ -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import BlockBase - - -@dataclass -class Unsupported(BlockBase): - @staticmethod - def can_have_children() -> bool: - return False - - @classmethod - def from_dict(cls, data: dict): - return cls() - - def get_html(self) -> Optional[HtmlTag]: - return None diff --git a/unstructured/ingest/connector/notion/types/blocks/video.py b/unstructured/ingest/connector/notion/types/blocks/video.py deleted file mode 100644 index 2523adf70..000000000 --- a/unstructured/ingest/connector/notion/types/blocks/video.py +++ /dev/null @@ -1,22 +0,0 @@ -# https://developers.notion.com/reference/block#image -from typing import Optional - -from htmlBuilder.attributes import Src -from htmlBuilder.tags import HtmlTag, Source -from htmlBuilder.tags import Video as VideoHtml - -from unstructured.ingest.connector.notion.interfaces import BlockBase -from unstructured.ingest.connector.notion.types.file import FileObject - - -class Video(BlockBase, FileObject): - @staticmethod - def can_have_children() -> bool: - return False - - def get_html(self) -> Optional[HtmlTag]: - if self.external: - return VideoHtml([], [Source([Src(self.external.url)], [self.external.url])]) - if self.file: - return VideoHtml([], [Source([Src(self.file.url)], [self.file.url])]) - return None diff --git a/unstructured/ingest/connector/notion/types/database.py b/unstructured/ingest/connector/notion/types/database.py deleted file mode 100644 index db5718cf3..000000000 --- a/unstructured/ingest/connector/notion/types/database.py +++ /dev/null @@ -1,72 +0,0 @@ -# https://developers.notion.com/reference/database -from dataclasses import dataclass, field -from typing import Dict, List, Optional - -from htmlBuilder.tags import Div, HtmlTag, Span - -from unstructured.ingest.connector.notion.interfaces import ( - DBPropertyBase, - FromJSONMixin, - GetHTMLMixin, -) -from unstructured.ingest.connector.notion.types.database_properties import ( - map_properties, -) -from unstructured.ingest.connector.notion.types.file import FileObject -from unstructured.ingest.connector.notion.types.parent import Parent -from unstructured.ingest.connector.notion.types.rich_text import RichText -from unstructured.ingest.connector.notion.types.user import PartialUser - - -@dataclass -class Database(FromJSONMixin, GetHTMLMixin): - id: str - created_time: str - created_by: PartialUser - last_edited_time: str - last_edited_by: PartialUser - archived: bool - parent: Parent - url: str - is_inline: bool - public_url: str - request_id: Optional[str] = None - properties: Dict[str, DBPropertyBase] = field(default_factory=dict) - title: List[RichText] = field(default_factory=list) - description: List[RichText] = field(default_factory=list) - icon: Optional[FileObject] = None - cover: Optional[FileObject] = None - object: str = "database" - - @classmethod - def from_dict(cls, data: dict): - created_by = data.pop("created_by") - last_edited_by = data.pop("last_edited_by") - icon = data.pop("icon") - cover = data.pop("cover") - parent = data.pop("parent") - title = data.pop("title") - description = data.pop("description") - page = cls( - properties=map_properties(data.pop("properties", {})), - created_by=PartialUser.from_dict(created_by), - last_edited_by=PartialUser.from_dict(last_edited_by), - icon=FileObject.from_dict(icon) if icon else None, - cover=FileObject.from_dict(cover) if cover else None, - parent=Parent.from_dict(parent), - title=[RichText.from_dict(data=r) for r in title], - description=[RichText.from_dict(data=r) for r in description], - **data, - ) - - return page - - def get_html(self) -> Optional[HtmlTag]: - spans = [] - if title := self.title: - spans.append(Span([], [rt.get_html() for rt in title])) - if description := self.description: - spans.append(Span([], [rt.get_html() for rt in description])) - if spans: - return Div([], spans) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/__init__.py b/unstructured/ingest/connector/notion/types/database_properties/__init__.py deleted file mode 100644 index 100111365..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/__init__.py +++ /dev/null @@ -1,106 +0,0 @@ -from typing import Dict - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - -from .checkbox import Checkbox, CheckboxCell -from .created_by import CreatedBy, CreatedByCell -from .created_time import CreatedTime, CreatedTimeCell -from .date import Date, DateCell -from .email import Email, EmailCell -from .files import Files, FilesCell -from .formula import Formula, FormulaCell -from .last_edited_by import LastEditedBy, LastEditedByCell -from .last_edited_time import LastEditedTime, LastEditedTimeCell -from .multiselect import MultiSelect, MultiSelectCell -from .number import Number, NumberCell -from .people import People, PeopleCell -from .phone_number import PhoneNumber, PhoneNumberCell -from .relation import Relation, RelationCell -from .rich_text import RichText, RichTextCell -from .rollup import Rollup, RollupCell -from .select import Select, SelectCell -from .status import Status, StatusCell -from .title import Title, TitleCell -from .unique_id import UniqueID, UniqueIDCell -from .url import URL, URLCell -from .verification import Verification, VerificationCell - -db_prop_type_mapping = { - "checkbox": Checkbox, - "created_by": CreatedBy, - "created_time": CreatedTime, - "date": Date, - "email": Email, - "files": Files, - "formula": Formula, - "last_edited_by": LastEditedBy, - "last_edited_time": LastEditedTime, - "multi_select": MultiSelect, - "number": Number, - "people": People, - "phone_number": PhoneNumber, - "relation": Relation, - "rich_text": RichText, - "rollup": Rollup, - "select": Select, - "status": Status, - "title": Title, - "unique_id": UniqueID, - "url": URL, - "verification": Verification, -} - - -def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]: - mapped_dict = {} - for k, v in props.items(): - try: - mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v) # type: ignore - except KeyError as ke: - raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke - - return mapped_dict - - -db_cell_type_mapping = { - "checkbox": CheckboxCell, - "created_by": CreatedByCell, - "created_time": CreatedTimeCell, - "date": DateCell, - "email": EmailCell, - "files": FilesCell, - "formula": FormulaCell, - "last_edited_by": LastEditedByCell, - "last_edited_time": LastEditedTimeCell, - "multi_select": MultiSelectCell, - "number": NumberCell, - "people": PeopleCell, - "phone_number": PhoneNumberCell, - "relation": RelationCell, - "rich_text": RichTextCell, - "rollup": RollupCell, - "select": SelectCell, - "status": StatusCell, - "title": TitleCell, - "unique_id": UniqueIDCell, - "url": URLCell, - "verification": VerificationCell, -} - - -def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]: - mapped_dict = {} - for k, v in props.items(): - try: - t = v["type"] - mapped_dict[k] = db_cell_type_mapping[t].from_dict(v) # type: ignore - except KeyError as ke: - raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke - - return mapped_dict - - -__all__ = [ - "map_properties", - "map_cells", -] diff --git a/unstructured/ingest/connector/notion/types/database_properties/checkbox.py b/unstructured/ingest/connector/notion/types/database_properties/checkbox.py deleted file mode 100644 index b60d187a1..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/checkbox.py +++ /dev/null @@ -1,38 +0,0 @@ -# https://developers.notion.com/reference/property-object#checkbox -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.attributes import Checked, Type -from htmlBuilder.tags import Div, HtmlTag, Input - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - - -@dataclass -class Checkbox(DBPropertyBase): - id: str - name: str - type: str = "checkbox" - checkbox: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class CheckboxCell(DBCellBase): - id: str - checkbox: bool - name: Optional[str] = None - type: str = "checkbox" - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - check_input_attributes = [Type("checkbox")] - if self.checkbox: - check_input_attributes.append(Checked("")) - return Div([], Input(check_input_attributes)) diff --git a/unstructured/ingest/connector/notion/types/database_properties/created_by.py b/unstructured/ingest/connector/notion/types/database_properties/created_by.py deleted file mode 100644 index 034b0c1c4..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/created_by.py +++ /dev/null @@ -1,35 +0,0 @@ -# https://developers.notion.com/reference/property-object#created-by -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.user import People - - -@dataclass -class CreatedBy(DBPropertyBase): - id: str - name: str - type: str = "created_by" - created_by: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class CreatedByCell(DBCellBase): - id: str - created_by: People - type: str = "created_by" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(created_by=People.from_dict(data.pop("created_by")), **data) - - def get_html(self) -> Optional[HtmlTag]: - return self.created_by.get_html() diff --git a/unstructured/ingest/connector/notion/types/database_properties/created_time.py b/unstructured/ingest/connector/notion/types/database_properties/created_time.py deleted file mode 100644 index 86c1173d6..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/created_time.py +++ /dev/null @@ -1,34 +0,0 @@ -# https://developers.notion.com/reference/property-object#created-time -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - - -@dataclass -class CreatedTime(DBPropertyBase): - id: str - name: str - type: str = "created_time" - created_time: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class CreatedTimeCell(DBCellBase): - id: str - created_time: str - type: str = "created_time" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Div([], self.created_time) diff --git a/unstructured/ingest/connector/notion/types/database_properties/date.py b/unstructured/ingest/connector/notion/types/database_properties/date.py deleted file mode 100644 index 779ef60cc..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/date.py +++ /dev/null @@ -1,41 +0,0 @@ -# https://developers.notion.com/reference/property-object#date -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.date import Date as DateType - - -@dataclass -class Date(DBPropertyBase): - id: str - name: str - type: str = "date" - date: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class DateCell(DBCellBase): - id: str - date: Optional[DateType] = None - name: Optional[str] = None - type: str = "date" - - @classmethod - def from_dict(cls, data: dict): - date = None - date_data = data.pop("date") - if date_data: - date = DateType.from_dict(date_data) - return cls(date=date, **data) - - def get_html(self) -> Optional[HtmlTag]: - if date := self.date: - return date.get_html() - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/email.py b/unstructured/ingest/connector/notion/types/database_properties/email.py deleted file mode 100644 index 1303770a8..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/email.py +++ /dev/null @@ -1,36 +0,0 @@ -# https://developers.notion.com/reference/property-object#email -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - - -@dataclass -class Email(DBPropertyBase): - id: str - name: str - type: str = "email" - email: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class EmailCell(DBCellBase): - id: str - email: str - name: Optional[str] = None - type: str = "email" - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - if email := self.email: - return Div([], email) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/files.py b/unstructured/ingest/connector/notion/types/database_properties/files.py deleted file mode 100644 index 680ee15ba..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/files.py +++ /dev/null @@ -1,37 +0,0 @@ -# https://developers.notion.com/reference/property-object#files -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.file import FileObject - - -@dataclass -class Files(DBPropertyBase): - id: str - name: str - type: str = "files" - files: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class FilesCell(DBCellBase): - id: str - files: List[FileObject] - type: str = "files" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(files=[FileObject.from_dict(f) for f in data.pop("files", [])], **data) - - def get_html(self) -> Optional[HtmlTag]: - if not self.files: - return None - return Div([], [f.get_html() for f in self.files]) diff --git a/unstructured/ingest/connector/notion/types/database_properties/formula.py b/unstructured/ingest/connector/notion/types/database_properties/formula.py deleted file mode 100644 index b1921367e..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/formula.py +++ /dev/null @@ -1,49 +0,0 @@ -# https://developers.notion.com/reference/property-object#formula -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class FormulaProp(FromJSONMixin): - expression: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class Formula(DBPropertyBase): - id: str - name: str - formula: FormulaProp - type: str = "formula" - - @classmethod - def from_dict(cls, data: dict): - return cls(formula=FormulaProp.from_dict(data.pop("formula", {})), **data) - - -@dataclass -class FormulaCell(DBCellBase): - id: str - formula: dict - type: str = "formula" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - formula = self.formula - t = formula.get("type") - return Div([], str(formula[t])) diff --git a/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py b/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py deleted file mode 100644 index a1a2d0a9c..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/last_edited_by.py +++ /dev/null @@ -1,34 +0,0 @@ -# https://developers.notion.com/reference/property-object#last-edited-by -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.user import People - - -@dataclass -class LastEditedBy(DBPropertyBase): - @classmethod - def from_dict(cls, data: dict): - return cls() - - def get_text(self) -> Optional[str]: - return None - - -@dataclass -class LastEditedByCell(DBCellBase): - id: str - last_edited_by: People - type: str = "last_edited_by" - - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(last_edited_by=People.from_dict(data.pop("last_edited_by", {})), **data) - - def get_html(self) -> Optional[HtmlTag]: - return self.last_edited_by.get_html() diff --git a/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py b/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py deleted file mode 100644 index 4c9e00981..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/last_edited_time.py +++ /dev/null @@ -1,34 +0,0 @@ -# https://developers.notion.com/reference/property-object#last-edited-time -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - - -@dataclass -class LastEditedTime(DBPropertyBase): - id: str - name: str - type: str = "last_edited_time" - last_edited_time: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class LastEditedTimeCell(DBCellBase): - id: str - last_edited_time: str - type: str = "last_edited_time" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Div([], self.last_edited_time) diff --git a/unstructured/ingest/connector/notion/types/database_properties/multiselect.py b/unstructured/ingest/connector/notion/types/database_properties/multiselect.py deleted file mode 100644 index 7534ab82d..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/multiselect.py +++ /dev/null @@ -1,73 +0,0 @@ -# https://developers.notion.com/reference/property-object#multi-select -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Div, HtmlTag, Span - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class MultiSelectOption(FromJSONMixin): - color: str - id: str - name: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class MultiSelectProp(FromJSONMixin): - options: List[MultiSelectOption] = field(default_factory=list) - - @classmethod - def from_dict(cls, data: dict): - return cls(options=[MultiSelectOption.from_dict(o) for o in data.get("options", [])]) - - -@dataclass -class MultiSelect(DBPropertyBase): - id: str - name: str - multi_select: MultiSelectProp - type: str = "multi_select" - - @classmethod - def from_dict(cls, data: dict): - return cls( - multi_select=data.pop("multi_select", {}), - **data, - ) - - -@dataclass -class MultiSelectCell(DBCellBase): - id: str - multi_select: List[MultiSelectOption] - type: str = "multi_select" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls( - multi_select=[MultiSelectOption.from_dict(o) for o in data.pop("multi_select", [])], - **data, - ) - - def get_html(self) -> Optional[HtmlTag]: - if not self.multi_select: - return None - option_spans = [] - for option in self.multi_select: - option_attributes = [] - if option.color and option.color != "default": - option_attributes.append(Style(f"color: {option.color}")) - option_spans.append(Span(option_attributes, option.name)) - return Div([], option_spans) diff --git a/unstructured/ingest/connector/notion/types/database_properties/number.py b/unstructured/ingest/connector/notion/types/database_properties/number.py deleted file mode 100644 index 599981fc0..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/number.py +++ /dev/null @@ -1,49 +0,0 @@ -# https://developers.notion.com/reference/property-object#number -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class NumberProp(FromJSONMixin): - format: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class Number(DBPropertyBase): - id: str - name: str - number: NumberProp - type: str = "number" - - @classmethod - def from_dict(cls, data: dict): - return cls(number=NumberProp.from_dict(data.pop("number")), **data) - - -@dataclass -class NumberCell(DBCellBase): - id: str - number: Optional[int] = None - type: str = "number" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - if number := self.number: - return Div([], str(number)) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/people.py b/unstructured/ingest/connector/notion/types/database_properties/people.py deleted file mode 100644 index 44e66b2e8..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/people.py +++ /dev/null @@ -1,40 +0,0 @@ -# https://developers.notion.com/reference/property-object#people -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Div, HtmlTag, Span - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.user import People as PeopleType - - -@dataclass -class People(DBPropertyBase): - id: str - name: str - type: str = "people" - people: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class PeopleCell(DBCellBase): - id: str - people: List[PeopleType] - type: str = "people" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(people=[PeopleType.from_dict(p) for p in data.pop("people", {})], **data) - - def get_html(self) -> Optional[HtmlTag]: - if not self.people: - return None - people_spans = [] - for person in self.people: - people_spans.append(Span([], person.get_html())) - return Div([], people_spans) diff --git a/unstructured/ingest/connector/notion/types/database_properties/phone_number.py b/unstructured/ingest/connector/notion/types/database_properties/phone_number.py deleted file mode 100644 index 58a5c9170..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/phone_number.py +++ /dev/null @@ -1,36 +0,0 @@ -# https://developers.notion.com/reference/property-object#phone-number -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - - -@dataclass -class PhoneNumber(DBPropertyBase): - id: str - name: str - type: str = "phone_number" - phone_number: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class PhoneNumberCell(DBCellBase): - id: str - phone_number: Optional[str] - name: Optional[str] = None - type: str = "phone_number" - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - if phone_number := self.phone_number: - return Div([], phone_number) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/relation.py b/unstructured/ingest/connector/notion/types/database_properties/relation.py deleted file mode 100644 index 35c283a11..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/relation.py +++ /dev/null @@ -1,67 +0,0 @@ -# https://developers.notion.com/reference/property-object#relation -from dataclasses import dataclass -from typing import Optional -from urllib.parse import unquote - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class DualProperty(FromJSONMixin): - synced_property_id: str - synced_property_name: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class RelationProp(FromJSONMixin): - database_id: str - type: str - dual_property: DualProperty - - @classmethod - def from_dict(cls, data: dict): - t = data.get("type") - if t == "dual_property": - dual_property = DualProperty.from_dict(data.pop(t)) - else: - raise ValueError(f"{t} type not recognized") - - return cls(dual_property=dual_property, **data) - - -@dataclass -class Relation(DBPropertyBase): - id: str - name: str - relation: RelationProp - type: str = "relation" - - @classmethod - def from_dict(cls, data: dict): - return cls(relation=RelationProp.from_dict(data.pop("relation")), **data) - - -@dataclass -class RelationCell(DBCellBase): - id: str - has_more: bool - relation: list - type: str = "relation" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Div([], unquote(self.id)) diff --git a/unstructured/ingest/connector/notion/types/database_properties/rich_text.py b/unstructured/ingest/connector/notion/types/database_properties/rich_text.py deleted file mode 100644 index 2bd56c2c9..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/rich_text.py +++ /dev/null @@ -1,43 +0,0 @@ -# https://developers.notion.com/reference/property-object#rich-text -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Div, HtmlTag, Span - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.rich_text import ( - RichText as RichTextType, -) - - -@dataclass -class RichText(DBPropertyBase): - id: str - name: str - type: str = "rich_text" - rich_text: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class RichTextCell(DBCellBase): - id: str - rich_text: List[RichTextType] - name: Optional[str] = None - type: str = "rich_text" - - @classmethod - def from_dict(cls, data: dict): - return cls( - rich_text=[RichTextType.from_dict(rt) for rt in data.pop("rich_text", [])], - **data, - ) - - def get_html(self) -> Optional[HtmlTag]: - if not self.rich_text: - return None - spans = [Span([], rt.get_html()) for rt in self.rich_text] - return Div([], spans) diff --git a/unstructured/ingest/connector/notion/types/database_properties/rollup.py b/unstructured/ingest/connector/notion/types/database_properties/rollup.py deleted file mode 100644 index 5134b40c4..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/rollup.py +++ /dev/null @@ -1,56 +0,0 @@ -# https://developers.notion.com/reference/property-object#rollup -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag, Span - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class RollupProp(FromJSONMixin): - function: str - relation_property_id: str - relation_property_name: str - rollup_property_id: str - rollup_property_name: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class Rollup(DBPropertyBase): - id: str - name: str - rollup: RollupProp - type: str = "rollup" - - @classmethod - def from_dict(cls, data: dict): - return cls(rollup=RollupProp.from_dict(data.pop("rollup")), **data) - - -@dataclass -class RollupCell(DBCellBase): - id: str - rollup: dict - type: str = "rollup" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - rollup = self.rollup - t = rollup.get("type") - v = rollup[t] - if isinstance(v, list): - return Div([], [Span([], str(x)) for x in v]) - return Div([], str(v)) diff --git a/unstructured/ingest/connector/notion/types/database_properties/select.py b/unstructured/ingest/connector/notion/types/database_properties/select.py deleted file mode 100644 index 550f2ffed..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/select.py +++ /dev/null @@ -1,68 +0,0 @@ -# https://developers.notion.com/reference/property-object#select -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class SelectOption(FromJSONMixin): - color: str - id: str - name: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class SelectProp(FromJSONMixin): - options: List[SelectOption] = field(default_factory=list) - - @classmethod - def from_dict(cls, data: dict): - return cls(options=[SelectOption.from_dict(o) for o in data.get("options", [])]) - - -@dataclass -class Select(DBPropertyBase): - id: str - name: str - select: SelectProp - type: str = "select" - - @classmethod - def from_dict(cls, data: dict): - return cls(select=SelectProp.from_dict(data.pop("select", {})), **data) - - -@dataclass -class SelectCell(DBCellBase): - id: str - select: Optional[SelectOption] - type: str = "select" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - select_data = data.pop("select") - select = None - if select_data: - select = SelectOption.from_dict(select_data) - return cls(select=select, **data) - - def get_html(self) -> Optional[HtmlTag]: - if select := self.select: - select_attr = [] - if select.color and select.color != "default": - select_attr.append(Style(f"color: {select.color}")) - return Div(select_attr, select.name) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/status.py b/unstructured/ingest/connector/notion/types/database_properties/status.py deleted file mode 100644 index 8139b98a6..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/status.py +++ /dev/null @@ -1,80 +0,0 @@ -# https://developers.notion.com/reference/property-object#status -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.attributes import Style -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class StatusOption(FromJSONMixin): - color: str - id: str - name: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class StatusGroup(FromJSONMixin): - color: str - id: str - name: str - option_ids: List[str] = field(default_factory=List[str]) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class StatusProp(FromJSONMixin): - options: List[StatusOption] = field(default_factory=list) - groups: List[StatusGroup] = field(default_factory=list) - - @classmethod - def from_dict(cls, data: dict): - return cls( - options=[StatusOption.from_dict(o) for o in data.get("options", [])], - groups=[StatusGroup.from_dict(g) for g in data.get("groups", [])], - ) - - -@dataclass -class Status(DBPropertyBase): - id: str - name: str - status: StatusProp - type: str = "status" - - @classmethod - def from_dict(cls, data: dict): - return cls(status=StatusProp.from_dict(data.pop("status", {})), **data) - - -@dataclass -class StatusCell(DBCellBase): - id: str - status: Optional[StatusOption] - type: str = "status" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(status=StatusOption.from_dict(data.pop("status", {})), **data) - - def get_html(self) -> Optional[HtmlTag]: - if status := self.status: - select_attr = [] - if status.color and status.color != "default": - select_attr.append(Style(f"color: {status.color}")) - return Div(select_attr, status.name) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/title.py b/unstructured/ingest/connector/notion/types/database_properties/title.py deleted file mode 100644 index aaee0e6ad..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/title.py +++ /dev/null @@ -1,37 +0,0 @@ -# https://developers.notion.com/reference/property-object#title -from dataclasses import dataclass, field -from typing import List, Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase -from unstructured.ingest.connector.notion.types.rich_text import RichText - - -@dataclass -class Title(DBPropertyBase): - id: str - name: str - type: str = "title" - title: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class TitleCell(DBCellBase): - id: str - title: List[RichText] - type: str = "title" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(title=[RichText.from_dict(rt) for rt in data.pop("title", [])], **data) - - def get_html(self) -> Optional[HtmlTag]: - if not self.title: - return None - return Div([], [rt.get_html() for rt in self.title]) diff --git a/unstructured/ingest/connector/notion/types/database_properties/unique_id.py b/unstructured/ingest/connector/notion/types/database_properties/unique_id.py deleted file mode 100644 index 643f2c07a..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/unique_id.py +++ /dev/null @@ -1,50 +0,0 @@ -# https://developers.notion.com/reference/property-object#title -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, -) - - -@dataclass -class UniqueID(DBPropertyBase): - id: str - name: str - type: str = "unique_id" - unique_id: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class UniqueIDCellData(FromJSONMixin): - prefix: str - number: int - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class UniqueIDCell(DBCellBase): - id: str - unique_id: Optional[UniqueIDCellData] - type: str = "title" - name: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(unique_id=UniqueIDCellData.from_dict(data.pop("unique_id")), **data) - - def get_html(self) -> Optional[HtmlTag]: - if unique_id := self.unique_id: - return Div([], f"{unique_id.prefix}-{unique_id.number}") - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/url.py b/unstructured/ingest/connector/notion/types/database_properties/url.py deleted file mode 100644 index 8233ae9c2..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/url.py +++ /dev/null @@ -1,37 +0,0 @@ -# https://developers.notion.com/reference/property-object#url -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase - - -@dataclass -class URL(DBPropertyBase): - id: str - name: str - type: str = "url" - url: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class URLCell(DBCellBase): - id: str - url: Optional[str] = None - name: Optional[str] = None - type: str = "url" - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - if url := self.url: - return A([Href(url)], url) - return None diff --git a/unstructured/ingest/connector/notion/types/database_properties/verification.py b/unstructured/ingest/connector/notion/types/database_properties/verification.py deleted file mode 100644 index 03ade8e3b..000000000 --- a/unstructured/ingest/connector/notion/types/database_properties/verification.py +++ /dev/null @@ -1,78 +0,0 @@ -# https://developers.notion.com/reference/property-object#url -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag, Span - -from unstructured.ingest.connector.notion.interfaces import ( - DBCellBase, - DBPropertyBase, - FromJSONMixin, - GetHTMLMixin, -) -from unstructured.ingest.connector.notion.types.date import Date -from unstructured.ingest.connector.notion.types.user import People - - -@dataclass -class Verification(DBPropertyBase): - id: str - name: str - type: str = "verification" - verification: dict = field(default_factory=dict) - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class VerificationData(FromJSONMixin, GetHTMLMixin): - state: Optional[str] - verified_by: Optional[People] - date: Optional[Date] - - @classmethod - def from_dict(cls, data: dict): - verified_by = data.pop("verified_by", None) - date = data.pop("date", None) - return cls( - verified_by=People.from_dict(data=verified_by) if verified_by else None, - date=Date.from_dict(data=date) if date else None, - **data, - ) - - def get_html(self) -> Optional[HtmlTag]: - elements = [] - if state := self.state: - elements.append(Span([], state)) - if (verified_by := self.verified_by) and (verified_by_html := verified_by.get_html()): - elements.append(verified_by_html) - if (date := self.date) and (date_html := date.get_html()): - elements.append(date_html) - if elements: - return Div([], elements) - return None - - -@dataclass -class VerificationCell(DBCellBase): - id: str - verification: Optional[VerificationData] - name: Optional[str] = None - type: str = "verification" - - @classmethod - def from_dict(cls, data: dict): - return cls(verification=VerificationData.from_dict(data.pop("verification")), **data) - - def get_html(self) -> Optional[HtmlTag]: - elements = [] - if name := self.name: - elements.append(Span([], name)) - if (verification := self.verification) and (verification_html := verification.get_html()): - elements.append(verification_html) - - if elements: - return Div([], elements) - return None diff --git a/unstructured/ingest/connector/notion/types/date.py b/unstructured/ingest/connector/notion/types/date.py deleted file mode 100644 index 7c6dcf1fd..000000000 --- a/unstructured/ingest/connector/notion/types/date.py +++ /dev/null @@ -1,26 +0,0 @@ -# https://developers.notion.com/reference/property-value-object#date-property-values -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.tags import Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin - - -@dataclass -class Date(FromJSONMixin, GetHTMLMixin): - start: str - end: Optional[str] = None - time_zone: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - text = f"{self.start}" - if end := self.end: - text += f" - {end}" - if self.time_zone: - text += f" {self.time_zone}" - return Div([], text) diff --git a/unstructured/ingest/connector/notion/types/file.py b/unstructured/ingest/connector/notion/types/file.py deleted file mode 100644 index 6ade2d1e4..000000000 --- a/unstructured/ingest/connector/notion/types/file.py +++ /dev/null @@ -1,51 +0,0 @@ -# https://developers.notion.com/reference/file-object -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin - - -@dataclass -class External(FromJSONMixin): - url: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class File(FromJSONMixin): - url: str - expiry_time: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class FileObject(FromJSONMixin, GetHTMLMixin): - type: str - external: Optional[External] = None - file: Optional[File] = None - - @classmethod - def from_dict(cls, data: dict): - t = data["type"] - file_object = cls(type=t) - if t == "external": - file_object.external = External.from_dict(data["external"]) - elif t == "file": - file_object.file = File.from_dict(data["file"]) - return file_object - - def get_html(self) -> Optional[HtmlTag]: - if self.file: - return A([Href(self.file.url)], self.file.url) - if self.external: - return A([Href(self.external.url)], self.external.url) - return None diff --git a/unstructured/ingest/connector/notion/types/page.py b/unstructured/ingest/connector/notion/types/page.py deleted file mode 100644 index 1bbda85c7..000000000 --- a/unstructured/ingest/connector/notion/types/page.py +++ /dev/null @@ -1,44 +0,0 @@ -# https://developers.notion.com/reference/page -from dataclasses import dataclass -from typing import Optional - -from unstructured.ingest.connector.notion.interfaces import FromJSONMixin -from unstructured.ingest.connector.notion.types.file import FileObject -from unstructured.ingest.connector.notion.types.parent import Parent -from unstructured.ingest.connector.notion.types.user import PartialUser - - -@dataclass -class Page(FromJSONMixin): - id: str - created_time: str - created_by: PartialUser - last_edited_time: str - last_edited_by: PartialUser - archived: bool - properties: dict - parent: Parent - url: str - public_url: str - request_id: Optional[str] = None - object: str = "page" - icon: Optional[FileObject] = None - cover: Optional[FileObject] = None - - @classmethod - def from_dict(cls, data: dict): - created_by = data.pop("created_by") - last_edited_by = data.pop("last_edited_by") - icon = data.pop("icon") - cover = data.pop("cover") - parent = data.pop("parent") - page = cls( - created_by=PartialUser.from_dict(created_by), - last_edited_by=PartialUser.from_dict(last_edited_by), - icon=FileObject.from_dict(icon) if icon else None, - cover=FileObject.from_dict(cover) if cover else None, - parent=Parent.from_dict(parent), - **data, - ) - - return page diff --git a/unstructured/ingest/connector/notion/types/parent.py b/unstructured/ingest/connector/notion/types/parent.py deleted file mode 100644 index f78c16673..000000000 --- a/unstructured/ingest/connector/notion/types/parent.py +++ /dev/null @@ -1,66 +0,0 @@ -# https://developers.notion.com/reference/parent-object -from dataclasses import dataclass - -from unstructured.ingest.connector.notion.interfaces import FromJSONMixin - - -# https://developers.notion.com/reference/parent-object#database-parent -@dataclass -class DatabaseParent(FromJSONMixin): - database_id: str - type: str = "database_id" - - @classmethod - def from_dict(cls, data: dict): - return cls(database_id=data["database_id"]) - - -# https://developers.notion.com/reference/parent-object#page-parent -@dataclass -class PageParent(FromJSONMixin): - page_id: str - type: str = "page_id" - - @classmethod - def from_dict(cls, data: dict): - return cls(page_id=data["page_id"]) - - -# https://developers.notion.com/reference/parent-object#workspace-parent -@dataclass -class WorkspaceParent(FromJSONMixin): - type: str = "workspace" - workspace: bool = True - - @classmethod - def from_dict(cls, data: dict): - return cls() - - -# https://developers.notion.com/reference/parent-object#block-parent -@dataclass -class BlockParent(FromJSONMixin): - block_id: str - type: str = "block_id" - - @classmethod - def from_dict(cls, data: dict): - return cls(block_id=data["block_id"]) - - -@dataclass -class Parent(FromJSONMixin): - block_id: str - type: str = "block_id" - - @classmethod - def from_dict(cls, data: dict): - t = data["type"] - if t == "database_id": - return DatabaseParent.from_dict(data) - elif t == "page_id": - return PageParent.from_dict(data) - elif t == "workspace": - return WorkspaceParent.from_dict(data) - elif t == "block_id": - return BlockParent.from_dict(data) diff --git a/unstructured/ingest/connector/notion/types/rich_text.py b/unstructured/ingest/connector/notion/types/rich_text.py deleted file mode 100644 index ae71a0a78..000000000 --- a/unstructured/ingest/connector/notion/types/rich_text.py +++ /dev/null @@ -1,189 +0,0 @@ -# https://developers.notion.com/reference/rich-text -from dataclasses import dataclass -from typing import Optional - -from htmlBuilder.attributes import Href, Style -from htmlBuilder.tags import A, B, Code, Div, HtmlTag, I, S, Span, U -from htmlBuilder.tags import Text as HtmlText - -from unstructured.ingest.connector.notion.interfaces import ( - FromJSONMixin, - GetHTMLMixin, -) -from unstructured.ingest.connector.notion.types.date import Date -from unstructured.ingest.connector.notion.types.user import People - - -@dataclass -class Annotations(FromJSONMixin): - bold: bool - code: bool - italic: bool - strikethrough: bool - underline: bool - color: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class Equation(FromJSONMixin, GetHTMLMixin): - expression: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Code([], self.expression) if self.expression else None - - -@dataclass -class MentionDatabase(FromJSONMixin, GetHTMLMixin): - id: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Div([], self.id) if self.id else None - - -@dataclass -class MentionLinkPreview(FromJSONMixin, GetHTMLMixin): - url: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return A([Href(self.url)], self.url) if self.url else None - - -@dataclass -class MentionPage(FromJSONMixin, GetHTMLMixin): - id: str - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_html(self) -> Optional[HtmlTag]: - return Div([], self.id) if self.id else None - - -@dataclass -class MentionTemplate(FromJSONMixin): - template_mention_date: Optional[str] - template_mention_user: Optional[str] - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class Mention(FromJSONMixin, GetHTMLMixin): - type: str - database: Optional[MentionDatabase] = None - date: Optional[Date] = None - link_preview: Optional[MentionLinkPreview] = None - page: Optional[MentionPage] = None - template_mention: Optional[MentionTemplate] = None - user: Optional[People] = None - - @classmethod - def from_dict(cls, data: dict): - t = data["type"] - mention = cls(type=t) - if t == "date": - mention.date = Date.from_dict(data["date"]) - elif t == "database": - mention.database = MentionDatabase.from_dict(data["database"]) - elif t == "link_preview": - mention.link_preview = MentionLinkPreview.from_dict(data["link_preview"]) - elif t == "page": - mention.page = MentionPage.from_dict(data["page"]) - elif t == "template_mention": - mention.template_mention = MentionTemplate.from_dict(data["template_mention"]) - elif t == "user": - mention.user = People.from_dict(data["user"]) - - return mention - - def get_html(self) -> Optional[HtmlTag]: - t = self.type - if t == "date": - return self.date.get_html() if self.date else None - elif t == "database": - return self.database.get_html() if self.database else None - elif t == "link_preview": - return self.link_preview.get_html() if self.link_preview else None - elif t == "page": - return self.page.get_html() if self.page else None - elif t == "user": - return self.user.get_html() if self.user else None - return None - - -@dataclass -class Text(FromJSONMixin): - content: str - link: Optional[dict] - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - -@dataclass -class RichText(FromJSONMixin, GetHTMLMixin): - type: str - plain_text: str - annotations: Optional[Annotations] = None - href: Optional[str] = None - text: Optional[Text] = None - mention: Optional[Mention] = None - equation: Optional[Equation] = None - - def get_html(self) -> Optional[HtmlTag]: - text = HtmlText(self.plain_text) - if self.href: - text = A([Href(self.href)], text) - if self.annotations: - annotations = self.annotations - if annotations.bold: - text = B([], text) - if annotations.code: - text = Code([], text) - if annotations.italic: - text = I([], text) - if annotations.strikethrough: - text = S([], text) - if annotations.underline: - text = U([], text) - if annotations.color and annotations.color != "default": - if isinstance(text, HtmlText): - text = Span([], text) - text.attributes.append(Style(f"color:{annotations.color}")) - return text - - @classmethod - def from_dict(cls, data: dict): - t = data["type"] - rich_text = cls( - annotations=Annotations.from_dict(data.pop("annotations")), - **data, - ) - if t == "text": - rich_text.text = Text.from_dict(data["text"]) - elif t == "mention": - rich_text.mention = Mention.from_dict(data["mention"]) - elif t == "equation": - rich_text.equation = Equation.from_dict(data["equation"]) - - return rich_text diff --git a/unstructured/ingest/connector/notion/types/user.py b/unstructured/ingest/connector/notion/types/user.py deleted file mode 100644 index 4574c0b8f..000000000 --- a/unstructured/ingest/connector/notion/types/user.py +++ /dev/null @@ -1,76 +0,0 @@ -# https://developers.notion.com/reference/user -from dataclasses import dataclass, field -from typing import Optional - -from htmlBuilder.attributes import Href -from htmlBuilder.tags import A, Div, HtmlTag - -from unstructured.ingest.connector.notion.interfaces import FromJSONMixin, GetHTMLMixin - - -@dataclass -class PartialUser(FromJSONMixin): - id: str - object: str = "user" - - @classmethod - def from_dict(cls, data: dict): - return cls(id=data["id"]) - - -@dataclass -class User(FromJSONMixin, GetHTMLMixin): - object: dict - id: str - type: Optional[str] = None - name: Optional[str] = None - avatar_url: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_text(self) -> Optional[str]: - text = self.name - if self.avatar_url: - text = f"[{text}]({self.avatar_url}" - return text - - def get_html(self) -> Optional[HtmlTag]: - if self.avatar_url: - return A([Href(self.avatar_url)], self.name) - else: - return Div([], self.name) - - -@dataclass -class People(User): - person: dict = field(default_factory=dict) - - -@dataclass -class Bots(FromJSONMixin, GetHTMLMixin): - object: dict - id: str - bot: dict - owner: dict - type: str - workspace_name: str - name: Optional[str] = None - avatar_url: Optional[str] = None - - @classmethod - def from_dict(cls, data: dict): - return cls(**data) - - def get_text(self) -> Optional[str]: - text = self.name - if self.avatar_url: - text = f"[{text}]({self.avatar_url}" - return text - - def get_html(self) -> Optional[HtmlTag]: - if self.avatar_url: - return A([Href(self.avatar_url)], self.name) - else: - return Div([], self.name) diff --git a/unstructured/ingest/connector/onedrive.py b/unstructured/ingest/connector/onedrive.py deleted file mode 100644 index 303e7f8fc..000000000 --- a/unstructured/ingest/connector/onedrive.py +++ /dev/null @@ -1,232 +0,0 @@ -import typing as t -from dataclasses import dataclass, field -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.string_and_date_utils import ensure_isoformat_datetime -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from office365.graph_client import GraphClient - from office365.onedrive.driveitems.driveItem import DriveItem -MAX_MB_SIZE = 512_000_000 - - -@dataclass -class OneDriveAccessConfig(AccessConfig): - client_credential: str = enhanced_field(repr=False, sensitive=True, overload_name="client_cred") - - -@dataclass -class SimpleOneDriveConfig(BaseConnectorConfig): - access_config: OneDriveAccessConfig - client_id: str - user_pname: str - tenant: str = field(repr=False) - authority_url: t.Optional[str] = field(repr=False, default="https://login.microsoftonline.com") - path: t.Optional[str] = field(default="") - recursive: bool = False - - def __post_init__(self): - if not (self.client_id and self.access_config.client_credential and self.user_pname): - raise ValueError( - "Please provide all the following mandatory values:" - "\n-ms-client_id\n-ms-client_cred\n-ms-user-pname", - ) - self.token_factory = self._acquire_token - - @SourceConnectionError.wrap - @requires_dependencies(["msal"]) - def _acquire_token(self): - from msal import ConfidentialClientApplication - - try: - app = ConfidentialClientApplication( - authority=f"{self.authority_url}/{self.tenant}", - client_id=self.client_id, - client_credential=self.access_config.client_credential, - ) - token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"]) - except ValueError as exc: - logger.error("Couldn't set up credentials for OneDrive") - raise exc - return token - - -@dataclass -class OneDriveIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleOneDriveConfig - file_name: str - file_path: str - registry_name: str = "onedrive" - - def __post_init__(self): - self.ext = Path(self.file_name).suffix - if not self.ext: - raise ValueError("Unsupported file without extension.") - - self.server_relative_path = self.file_path + "/" + self.file_name - self._set_download_paths() - - def _set_download_paths(self) -> None: - """Parses the folder structure from the source and creates the download and output paths""" - download_path = Path(f"{self.read_config.download_dir}") - output_path = Path(f"{self.processor_config.output_dir}") - - if parent_path := self.file_path: - download_path = ( - download_path if parent_path == "" else (download_path / parent_path).resolve() - ) - output_path = ( - output_path if parent_path == "" else (output_path / parent_path).resolve() - ) - - self.download_dir = download_path - self.download_filepath = (download_path / self.file_name).resolve() - output_filename = output_filename = self.file_name + ".json" - self.output_dir = output_path - self.output_filepath = (output_path / output_filename).resolve() - - @property - def filename(self): - return Path(self.download_filepath).resolve() - - @property - def _output_filename(self): - return Path(self.output_filepath).resolve() - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "user_pname": self.connector_config.user_pname, - "server_relative_path": self.server_relative_path, - } - - @SourceConnectionNetworkError.wrap - @requires_dependencies(["office365"], extras="onedrive") - def _fetch_file(self): - from office365.graph_client import GraphClient - - client = GraphClient(self.connector_config.token_factory) - root = client.users[self.connector_config.user_pname].drive.get().execute_query().root - file = root.get_by_path(self.server_relative_path).get().execute_query() - return file - - def update_source_metadata(self, **kwargs): - file = kwargs.get("file", self._fetch_file()) - if file is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - - version = None - if (n_versions := len(file.versions)) > 0: - version = file.versions[n_versions - 1].properties.get("id", None) - - self.source_metadata = SourceMetadata( - date_created=ensure_isoformat_datetime(timestamp=file.created_datetime), - date_modified=ensure_isoformat_datetime(timestamp=file.last_modified_datetime), - version=version, - source_url=file.parent_reference.path + "/" + self.file_name, - exists=True, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - file = self._fetch_file() - self.update_source_metadata(file=file) - if file is None: - raise ValueError( - f"Failed to retrieve file {self.file_path}/{self.file_name}", - ) - - fsize = file.get_property("size", 0) - self.output_dir.mkdir(parents=True, exist_ok=True) - - if not self.download_dir.is_dir(): - logger.debug(f"Creating directory: {self.download_dir}") - self.download_dir.mkdir(parents=True, exist_ok=True) - - if fsize > MAX_MB_SIZE: - logger.info(f"Downloading file with size: {fsize} bytes in chunks") - with self.filename.open(mode="wb") as f: - file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query() - else: - with self.filename.open(mode="wb") as f: - file.download(f).execute_query() - logger.info(f"File downloaded: {self.filename}") - return - - -@dataclass -class OneDriveSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleOneDriveConfig - _client: t.Optional["GraphClient"] = field(init=False, default=None) - - @property - def client(self) -> "GraphClient": - from office365.graph_client import GraphClient - - if self._client is None: - self._client = GraphClient(self.connector_config.token_factory) - return self._client - - @requires_dependencies(["office365"], extras="onedrive") - def initialize(self): - _ = self.client - - @requires_dependencies(["office365"], extras="onedrive") - def check_connection(self): - try: - token_resp: dict = self.connector_config.token_factory() - if error := token_resp.get("error"): - raise SourceConnectionError( - "{} ({})".format(error, token_resp.get("error_description")) - ) - _ = self.client - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def _list_objects(self, folder, recursive) -> t.List["DriveItem"]: - drive_items = folder.children.get().execute_query() - files = [d for d in drive_items if d.is_file] - if not recursive: - return files - folders = [d for d in drive_items if d.is_folder] - for f in folders: - files += self._list_objects(f, recursive) - return files - - def _gen_ingest_doc(self, file: "DriveItem") -> OneDriveIngestDoc: - file_path = file.parent_reference.path.split(":")[-1] - file_path = file_path[1:] if file_path[0] == "/" else file_path - return OneDriveIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - file_name=file.name, - file_path=file_path, - ) - - def get_ingest_docs(self): - root = self.client.users[self.connector_config.user_pname].drive.get().execute_query().root - if fpath := self.connector_config.path: - root = root.get_by_path(fpath).get().execute_query() - if root is None or not root.is_folder: - raise ValueError(f"Unable to find directory, given: {fpath}") - files = self._list_objects(root, self.connector_config.recursive) - return [self._gen_ingest_doc(f) for f in files] diff --git a/unstructured/ingest/connector/opensearch.py b/unstructured/ingest/connector/opensearch.py deleted file mode 100644 index 543bfbc39..000000000 --- a/unstructured/ingest/connector/opensearch.py +++ /dev/null @@ -1,219 +0,0 @@ -import typing as t -from dataclasses import dataclass, field - -from dataclasses_json.core import Json - -from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchDestinationConnector, - ElasticsearchDocumentMeta, - ElasticsearchIngestDoc, - ElasticsearchIngestDocBatch, - ElasticsearchSourceConnector, - SimpleElasticsearchConfig, -) -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import AccessConfig, BaseSingleIngestDoc -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import generator_batching_wbytes -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from opensearchpy import OpenSearch - -"""Since the actual OpenSearch project is a fork of Elasticsearch, we are relying -heavily on the Elasticsearch connector code, inheriting the functionality as much as possible.""" - - -@dataclass -class OpenSearchAccessConfig(AccessConfig): - hosts: t.Optional[t.List[str]] = None - username: t.Optional[str] = None - password: t.Optional[str] = enhanced_field(default=None, sensitive=True) - use_ssl: bool = False - verify_certs: bool = False - ssl_show_warn: bool = False - ca_certs: t.Optional[str] = None - client_cert: t.Optional[str] = None - client_key: t.Optional[str] = None - - def to_dict(self, **kwargs) -> t.Dict[str, Json]: - d = super().to_dict(**kwargs) - d["http_auth"] = (self.username, self.password) - return d - - -@dataclass -class SimpleOpenSearchConfig(SimpleElasticsearchConfig): - access_config: OpenSearchAccessConfig = None - - -@dataclass -class OpenSearchIngestDoc(ElasticsearchIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - - Current implementation creates a python OpenSearch client to fetch each doc, - rather than creating a client for each thread. - """ - - connector_config: SimpleOpenSearchConfig - registry_name: str = "opensearch" - - @SourceConnectionError.wrap - @requires_dependencies(["opensearchpy"], extras="opensearch") - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - pass - - -@dataclass -class OpenSearchIngestDocBatch(ElasticsearchIngestDocBatch): - connector_config: SimpleOpenSearchConfig - ingest_docs: t.List[OpenSearchIngestDoc] = field(default_factory=list) - registry_name: str = "opensearch_batch" - - @requires_dependencies(["opensearchpy"], extras="opensearch") - def _get_docs(self): - from opensearchpy import OpenSearch - from opensearchpy.helpers import scan - - ops = OpenSearch(**self.connector_config.access_config.to_dict(apply_name_overload=False)) - scan_query = { - "_source": self.connector_config.fields, - "version": True, - "query": {"ids": {"values": self.list_of_ids}}, - } - - result = scan( - ops, - query=scan_query, - scroll="1m", - index=self.connector_config.index_name, - ) - return list(result) - - @SourceConnectionError.wrap - @requires_dependencies(["opensearchpy"], extras="opensearch") - def get_files(self): - documents = self._get_docs() - for doc in documents: - ingest_doc = OpenSearchIngestDoc( - processor_config=self.processor_config, - read_config=self.read_config, - connector_config=self.connector_config, - document=doc, - document_meta=ElasticsearchDocumentMeta( - self.connector_config.index_name, doc["_id"] - ), - ) - ingest_doc.update_source_metadata() - doc_body = doc["_source"] - filename = ingest_doc.filename - flattened_dict = flatten_dict(dictionary=doc_body) - str_values = [str(value) for value in flattened_dict.values()] - concatenated_values = "\n".join(str_values) - - filename.parent.mkdir(parents=True, exist_ok=True) - with open(filename, "w", encoding="utf8") as f: - f.write(concatenated_values) - self.ingest_docs.append(ingest_doc) - - -@dataclass -class OpenSearchSourceConnector(ElasticsearchSourceConnector): - """Fetches particular fields from all documents in a given opensearch cluster and index""" - - connector_config: SimpleOpenSearchConfig - _ops: t.Optional["OpenSearch"] = field(init=False, default=None) - - @property - def ops(self): - from opensearchpy import OpenSearch - - if self._ops is None: - self._ops = OpenSearch( - **self.connector_config.access_config.to_dict(apply_name_overload=False) - ) - return self._ops - - def check_connection(self): - try: - assert self.ops.ping() - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - @requires_dependencies(["opensearchpy"], extras="opensearch") - def _get_doc_ids(self): - """Fetches all document ids in an index""" - from opensearchpy.helpers import scan - - hits = scan( - self.ops, - query=self.scan_query, - scroll="1m", - index=self.connector_config.index_name, - ) - - return [hit["_id"] for hit in hits] - - def get_ingest_docs(self): - """Fetches all documents in an index, using ids that are fetched with _get_doc_ids""" - ids = self._get_doc_ids() - id_batches = [ - ids[ - i - * self.connector_config.batch_size : (i + 1) # noqa - * self.connector_config.batch_size - ] - for i in range( - (len(ids) + self.connector_config.batch_size - 1) - // self.connector_config.batch_size - ) - ] - return [ - OpenSearchIngestDocBatch( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - list_of_ids=batched_ids, - ) - for batched_ids in id_batches - ] - - -@dataclass -class OpenSearchDestinationConnector(ElasticsearchDestinationConnector): - connector_config: SimpleOpenSearchConfig - _client: t.Optional["OpenSearch"] = field(init=False, default=None) - - @DestinationConnectionError.wrap - @requires_dependencies(["opensearchpy"], extras="opensearch") - def generate_client(self) -> "OpenSearch": - from opensearchpy import OpenSearch - - return OpenSearch(**self.connector_config.access_config.to_dict(apply_name_overload=False)) - - @requires_dependencies(["opensearchpy"], extras="opensearch") - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]]) -> None: - logger.info( - f"writing document batches to destination" - f" index named {self.connector_config.index_name}" - f" at {self.connector_config.access_config.hosts}" - f" with batch size (in bytes) {self.write_config.batch_size_bytes}" - f" with {self.write_config.num_processes} (number of) processes" - ) - from opensearchpy.helpers import parallel_bulk - - for batch in generator_batching_wbytes( - elements_dict, batch_size_limit_bytes=self.write_config.batch_size_bytes - ): - for success, info in parallel_bulk( - self.client, batch, thread_count=self.write_config.num_processes - ): - if not success: - logger.error( - "upload failed for a batch in opensearch destination connector:", info - ) diff --git a/unstructured/ingest/connector/outlook.py b/unstructured/ingest/connector/outlook.py deleted file mode 100644 index 58684a6db..000000000 --- a/unstructured/ingest/connector/outlook.py +++ /dev/null @@ -1,285 +0,0 @@ -import hashlib -import os -import typing as t -from collections import defaultdict -from dataclasses import dataclass, field -from itertools import chain -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -MAX_NUM_EMAILS = 1000000 # Maximum number of emails per folder -if t.TYPE_CHECKING: - from office365.graph_client import GraphClient - - -class MissingFolderError(Exception): - """There are no root folders with those names.""" - - -@dataclass -class OutlookAccessConfig(AccessConfig): - client_credential: str = enhanced_field(repr=False, sensitive=True, overload_name="client_cred") - - -@dataclass -class SimpleOutlookConfig(BaseConnectorConfig): - """This class is getting the token.""" - - access_config: OutlookAccessConfig - user_email: str - client_id: str - tenant: t.Optional[str] = field(repr=False, default="common") - authority_url: t.Optional[str] = field(repr=False, default="https://login.microsoftonline.com") - outlook_folders: t.List[str] = field(default_factory=list) - recursive: bool = False - registry_name: str = "outlook" - - def __post_init__(self): - if not (self.client_id and self.access_config.client_credential and self.user_email): - raise ValueError( - "Please provide one of the following mandatory values:" - "\nclient_id\nclient_cred\nuser_email", - ) - self.token_factory = self._acquire_token - - @requires_dependencies(["msal"]) - def _acquire_token(self): - from msal import ConfidentialClientApplication - - try: - app = ConfidentialClientApplication( - authority=f"{self.authority_url}/{self.tenant}", - client_id=self.client_id, - client_credential=self.access_config.client_credential, - ) - token = app.acquire_token_for_client( - scopes=["https://graph.microsoft.com/.default"], - ) - except ValueError as exc: - logger.error("Couldn't set up credentials for Outlook") - raise exc - return token - - @requires_dependencies(["office365"], extras="outlook") - def _get_client(self): - from office365.graph_client import GraphClient - - return GraphClient(self.token_factory) - - -@dataclass -class OutlookIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleOutlookConfig - message_id: str - registry_name: str = "outlook" - - def __post_init__(self): - self._set_download_paths() - - def hash_mail_name(self, id): - """Outlook email ids are 152 char long. Hash to shorten to 16.""" - return hashlib.sha256(id.encode("utf-8")).hexdigest()[:16] - - def _set_download_paths(self) -> None: - """Creates paths for downloading and parsing.""" - download_path = Path(f"{self.read_config.download_dir}") - output_path = Path(f"{self.processor_config.output_dir}") - - self.download_dir = download_path - self.download_filepath = ( - download_path / f"{self.hash_mail_name(self.message_id)}.eml" - ).resolve() - oname = f"{self.hash_mail_name(self.message_id)}.eml.json" - self.output_dir = output_path - self.output_filepath = (output_path / oname).resolve() - - @property - def filename(self): - return Path(self.download_filepath).resolve() - - @property - def _output_filename(self): - return Path(self.output_filepath).resolve() - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "message_id": self.message_id, - "user_email": self.connector_config.user_email, - } - - @requires_dependencies(["office365"], extras="outlook") - def update_source_metadata(self, **kwargs): - from office365.runtime.client_request_exception import ClientRequestException - - try: - client = self.connector_config._get_client() - msg = ( - client.users[self.connector_config.user_email] - .messages[self.message_id] - .get() - .execute_query() - ) - except ClientRequestException as e: - if e.response.status_code == 404: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - raise - self.source_metadata = SourceMetadata( - date_created=msg.created_datetime.isoformat(), - date_modified=msg.last_modified_datetime.isoformat(), - version=msg.get_property("changeKey"), - source_url=msg.get_property("webLink"), - exists=True, - ) - - @SourceConnectionNetworkError.wrap - def _run_download(self, local_file): - client = self.connector_config._get_client() - client.users[self.connector_config.user_email].messages[self.message_id].download( - local_file, - ).execute_query() - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - @requires_dependencies(["office365"], extras="outlook") - def get_file(self): - """Relies on Office365 python sdk message object to do the download.""" - try: - self.connector_config._get_client() - self.update_source_metadata() - if not self.download_dir.is_dir(): - logger.debug(f"Creating directory: {self.download_dir}") - self.download_dir.mkdir(parents=True, exist_ok=True) - - with open( - os.path.join( - self.download_dir, - self.hash_mail_name(self.message_id) + ".eml", - ), - "wb", - ) as local_file: - self._run_download(local_file=local_file) - - except Exception as e: - logger.error( - f"Error while downloading and saving file: {self.hash_mail_name(self.message_id)}.", - ) - logger.error(e) - return - logger.info(f"File downloaded: {self.hash_mail_name(self.message_id)}") - return - - -@dataclass -class OutlookSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleOutlookConfig - _client: t.Optional["GraphClient"] = field(init=False, default=None) - - @property - def client(self) -> "GraphClient": - if self._client is None: - self._client = self.connector_config._get_client() - return self._client - - def initialize(self): - try: - self.get_folder_ids() - except Exception as e: - raise SourceConnectionError(f"failed to validate connection: {e}") - - def check_connection(self): - try: - _ = self.client - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def recurse_folders(self, folder_id, main_folder_dict): - """We only get a count of subfolders for any folder. - Have to make additional calls to get subfolder ids.""" - subfolders = ( - self.client.users[self.connector_config.user_email] - .mail_folders[folder_id] - .child_folders.get() - .execute_query() - ) - for subfolder in subfolders: - for k, v in main_folder_dict.items(): - if subfolder.get_property("parentFolderId") in v: - v.append(subfolder.id) - if subfolder.get_property("childFolderCount") > 0: - self.recurse_folders(subfolder.id, main_folder_dict) - - def get_folder_ids(self): - """Sets the mail folder ids and subfolder ids for requested root mail folders.""" - self.root_folders = defaultdict(list) - root_folders_with_subfolders = [] - get_root_folders = ( - self.client.users[self.connector_config.user_email].mail_folders.get().execute_query() - ) - - for folder in get_root_folders: - self.root_folders[folder.display_name].append(folder.id) - if folder.get_property("childFolderCount") > 0: - root_folders_with_subfolders.append(folder.id) - - for folder in root_folders_with_subfolders: - self.recurse_folders(folder, self.root_folders) - - # Narrow down all mail folder ids (plus all subfolders) to the ones that were requested. - self.selected_folder_ids = list( - chain.from_iterable( - [ - v - for k, v in self.root_folders.items() - if k.lower() in [x.lower() for x in self.connector_config.outlook_folders] - ], - ), - ) - if not self.selected_folder_ids: - raise MissingFolderError( - "There are no root folders with the names: " - f"{self.connector_config.outlook_folders}", - ) - - def get_ingest_docs(self): - """Returns a list of all the message objects that are in the requested root folder(s).""" - filtered_messages = [] - - # Get all the relevant messages in the selected folders/subfolders. - for folder_id in self.selected_folder_ids: - messages = ( - self.client.users[self.connector_config.user_email] - .mail_folders[folder_id] - .messages.get() - .top(MAX_NUM_EMAILS) # Prevents the return from paging - .execute_query() - ) - # Skip empty list if there are no messages in folder. - if messages: - filtered_messages.append(messages) - return [ - OutlookIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - message_id=message.id, - ) - for message in list(chain.from_iterable(filtered_messages)) - ] diff --git a/unstructured/ingest/connector/pinecone.py b/unstructured/ingest/connector/pinecone.py deleted file mode 100644 index 6599185a1..000000000 --- a/unstructured/ingest/connector/pinecone.py +++ /dev/null @@ -1,142 +0,0 @@ -import copy -import json -import multiprocessing as mp -import typing as t -import uuid -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError, WriteError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - ConfigSessionHandleMixin, - IngestDocSessionHandleMixin, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from pinecone import Index as PineconeIndex - - -@dataclass -class PineconeAccessConfig(AccessConfig): - api_key: str = enhanced_field(sensitive=True) - - -@dataclass -class SimplePineconeConfig(ConfigSessionHandleMixin, BaseConnectorConfig): - index_name: str - environment: str - access_config: PineconeAccessConfig - - -@dataclass -class PineconeWriteConfig(WriteConfig): - batch_size: int = 50 - num_processes: int = 1 - - -@dataclass -class PineconeDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector): - write_config: PineconeWriteConfig - connector_config: SimplePineconeConfig - _index: t.Optional["PineconeIndex"] = None - - def to_dict(self, **kwargs): - """ - The _index variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle '_thread.lock' object - When serializing, remove it, meaning client data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - if hasattr(self_cp, "_index"): - setattr(self_cp, "_index", None) - return _asdict(self_cp, **kwargs) - - @property - def pinecone_index(self): - if self._index is None: - self._index = self.create_index() - return self._index - - def initialize(self): - pass - - @requires_dependencies(["pinecone"], extras="pinecone") - def create_index(self) -> "PineconeIndex": - from pinecone import Pinecone - - from unstructured import __version__ as unstructured_version - - pc = Pinecone( - api_key=self.connector_config.access_config.api_key, - source_tag=f"unstructured=={unstructured_version}", - ) - - index = pc.Index(self.connector_config.index_name) - logger.debug(f"Connected to index: {pc.describe_index(self.connector_config.index_name)}") - return index - - @DestinationConnectionError.wrap - def check_connection(self): - _ = self.pinecone_index - - @DestinationConnectionError.wrap - @requires_dependencies(["pinecone"], extras="pinecone") - def upsert_batch(self, batch): - import pinecone.exceptions - - index = self.pinecone_index - try: - response = index.upsert(batch) - except pinecone.exceptions.PineconeApiException as api_error: - raise WriteError(f"http error: {api_error}") from api_error - logger.debug(f"results: {response}") - - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info( - f"Upserting {len(elements_dict)} elements to destination " - f"index at {self.connector_config.index_name}", - ) - - pinecone_batch_size = self.write_config.batch_size - - logger.info(f"using {self.write_config.num_processes} processes to upload") - if self.write_config.num_processes == 1: - for chunk in batch_generator(elements_dict, pinecone_batch_size): - self.upsert_batch(chunk) # noqa: E203 - - else: - with mp.Pool( - processes=self.write_config.num_processes, - ) as pool: - pool.map( - self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size)) - ) - - def normalize_dict(self, element_dict: dict) -> dict: - # While flatten_dict enables indexing on various fields, - # element_serialized enables easily reloading the element object to memory. - # element_serialized is formed without text/embeddings to avoid data bloating. - return { - "id": str(uuid.uuid4()), - "values": element_dict.pop("embeddings", None), - "metadata": { - "text": element_dict.pop("text", None), - "element_serialized": json.dumps(element_dict), - **flatten_dict( - element_dict, - separator="-", - flatten_lists=True, - remove_none=True, - ), - }, - } diff --git a/unstructured/ingest/connector/qdrant.py b/unstructured/ingest/connector/qdrant.py deleted file mode 100644 index da19c2dae..000000000 --- a/unstructured/ingest/connector/qdrant.py +++ /dev/null @@ -1,145 +0,0 @@ -import json -import multiprocessing as mp -import typing as t -import uuid -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError, WriteError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - ConfigSessionHandleMixin, - IngestDocSessionHandleMixin, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from qdrant_client import QdrantClient - - -@dataclass -class QdrantAccessConfig(AccessConfig): - api_key: t.Optional[str] = enhanced_field(sensitive=True) - - -@dataclass -class SimpleQdrantConfig(ConfigSessionHandleMixin, BaseConnectorConfig): - collection_name: str - location: t.Optional[str] = None - url: t.Optional[str] = None - port: t.Optional[int] = 6333 - grpc_port: t.Optional[int] = 6334 - prefer_grpc: t.Optional[bool] = False - https: t.Optional[bool] = None - prefix: t.Optional[str] = None - timeout: t.Optional[float] = None - host: t.Optional[str] = None - path: t.Optional[str] = None - force_disable_check_same_thread: t.Optional[bool] = False - access_config: t.Optional[QdrantAccessConfig] = None - - -@dataclass -class QdrantWriteConfig(WriteConfig): - batch_size: int = 50 - num_processes: int = 1 - - -@dataclass -class QdrantDestinationConnector(IngestDocSessionHandleMixin, BaseDestinationConnector): - write_config: QdrantWriteConfig - connector_config: SimpleQdrantConfig - _client: t.Optional["QdrantClient"] = None - - @property - def qdrant_client(self): - if self._client is None: - self._client = self.create_client() - return self._client - - def initialize(self): - ... # fmt: skip - - @requires_dependencies(["qdrant_client"], extras="qdrant") - def create_client(self) -> "QdrantClient": - from qdrant_client import QdrantClient - - client = QdrantClient( - location=self.connector_config.location, - url=self.connector_config.url, - port=self.connector_config.port, - grpc_port=self.connector_config.grpc_port, - prefer_grpc=self.connector_config.prefer_grpc, - https=self.connector_config.https, - api_key=( - self.connector_config.access_config.api_key - if self.connector_config.access_config - else None - ), - prefix=self.connector_config.prefix, - timeout=self.connector_config.timeout, - host=self.connector_config.host, - path=self.connector_config.path, - force_disable_check_same_thread=self.connector_config.force_disable_check_same_thread, - ) - - return client - - @DestinationConnectionError.wrap - def check_connection(self): - self.qdrant_client.get_collections() - - @DestinationConnectionError.wrap - @requires_dependencies(["qdrant_client"], extras="qdrant") - def upsert_batch(self, batch: t.List[t.Dict[str, t.Any]]): - from qdrant_client import models - - client = self.qdrant_client - try: - points: list[models.PointStruct] = [models.PointStruct(**item) for item in batch] - response = client.upsert( - self.connector_config.collection_name, points=points, wait=True - ) - except Exception as api_error: - raise WriteError(f"Qdrant error: {api_error}") from api_error - logger.debug(f"results: {response}") - - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info( - f"Upserting {len(elements_dict)} elements to " - f"{self.connector_config.collection_name}", - ) - - qdrant_batch_size = self.write_config.batch_size - - logger.info(f"using {self.write_config.num_processes} processes to upload") - if self.write_config.num_processes == 1: - for chunk in batch_generator(elements_dict, qdrant_batch_size): - self.upsert_batch(chunk) - - else: - with mp.Pool( - processes=self.write_config.num_processes, - ) as pool: - pool.map(self.upsert_batch, list(batch_generator(elements_dict, qdrant_batch_size))) - - def normalize_dict(self, element_dict: dict) -> dict: - return { - "id": str(uuid.uuid4()), - "vector": element_dict.pop("embeddings", {}), - "payload": { - "text": element_dict.pop("text", None), - "element_serialized": json.dumps(element_dict), - **flatten_dict( - element_dict, - separator="-", - flatten_lists=True, - ), - }, - } diff --git a/unstructured/ingest/connector/reddit.py b/unstructured/ingest/connector/reddit.py deleted file mode 100644 index 18f8ba7c7..000000000 --- a/unstructured/ingest/connector/reddit.py +++ /dev/null @@ -1,166 +0,0 @@ -import typing as t -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from praw import Reddit - - -@dataclass -class RedditAccessConfig(AccessConfig): - client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True) - - -@dataclass -class SimpleRedditConfig(BaseConnectorConfig): - access_config: RedditAccessConfig - subreddit_name: str - num_posts: int - user_agent: str - client_id: str - search_query: t.Optional[str] = None - - def __post_init__(self): - if self.num_posts <= 0: - raise ValueError("The number of Reddit posts to fetch must be positive.") - - -@dataclass -class RedditIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleRedditConfig = field(repr=False) - post_id: str - registry_name: str = "reddit" - - def _create_full_tmp_dir_path(self): - self.filename.parent.mkdir(parents=True, exist_ok=True) - - @SourceConnectionNetworkError.wrap - @requires_dependencies(["praw"]) - def get_post(self): - from praw import Reddit - from praw.models import Submission - - reddit = Reddit( - client_id=self.connector_config.client_id, - client_secret=self.connector_config.access_config.client_secret, - user_agent=self.connector_config.user_agent, - ) - post = Submission(reddit, self.post_id) - return post - - def update_source_metadata(self, **kwargs): - post = kwargs.get("post", self.get_post()) - if post is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - - file_exists = (post.author != "[deleted]" or post.auth is not None) and ( - post.selftext != "[deleted]" or post.selftext != "[removed]" - ) - - self.source_metadata = SourceMetadata( - date_created=datetime.utcfromtimestamp(post.created_utc).isoformat(), - source_url=post.permalink, - exists=file_exists, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - """Fetches the "remote" doc and stores it locally on the filesystem.""" - self._create_full_tmp_dir_path() - # Write the title plus the body, if any - post = self.get_post() - self.update_source_metadata(post=post) - if post is None: - raise ValueError( - f"Failed to retrieve post {self.post_id}", - ) - - text_to_write = f"# {post.title}\n{post.selftext}" - with open(self.filename, "w", encoding="utf8") as f: - f.write(text_to_write) - - @property - def filename(self) -> Path: - return (Path(self.read_config.download_dir) / f"{self.post_id}.md").resolve() - - @property - def _output_filename(self): - return Path(self.processor_config.output_dir) / f"{self.post_id}.json" - - @property - def date_modified(self) -> t.Optional[str]: - return None - - @property - def version(self) -> t.Optional[str]: - return None - - -@dataclass -class RedditSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleRedditConfig - _reddit: t.Optional["Reddit"] = field(init=False, default=None) - - @property - def reddit(self) -> "Reddit": - from praw import Reddit - - if self._reddit is None: - self._reddit = Reddit( - client_id=self.connector_config.client_id, - client_secret=self.connector_config.access_config.client_secret, - user_agent=self.connector_config.user_agent, - ) - return self._reddit - - @requires_dependencies(["praw"], extras="reddit") - def initialize(self): - _ = self.reddit - - def check_connection(self): - from praw.endpoints import API_PATH - from prawcore import ResponseException - - try: - self.reddit._objectify_request(method="HEAD", params=None, path=API_PATH["me"]) - except ResponseException as response_error: - logger.error(f"failed to validate connection: {response_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {response_error}") - - def get_ingest_docs(self): - subreddit = self.reddit.subreddit(self.connector_config.subreddit_name) - if self.connector_config.search_query: - posts = subreddit.search( - self.connector_config.search_query, - limit=self.connector_config.num_posts, - ) - else: - posts = subreddit.hot(limit=self.connector_config.num_posts) - return [ - RedditIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - post_id=post.id, - ) - for post in posts - ] diff --git a/unstructured/ingest/connector/registry.py b/unstructured/ingest/connector/registry.py deleted file mode 100644 index 35250d6f0..000000000 --- a/unstructured/ingest/connector/registry.py +++ /dev/null @@ -1,109 +0,0 @@ -import json -from typing import Dict, Type, cast - -from unstructured.ingest.connector.airtable import AirtableIngestDoc -from unstructured.ingest.connector.astradb import AstraDBIngestDoc -from unstructured.ingest.connector.biomed import BiomedIngestDoc -from unstructured.ingest.connector.confluence import ConfluenceIngestDoc -from unstructured.ingest.connector.delta_table import DeltaTableIngestDoc -from unstructured.ingest.connector.discord import DiscordIngestDoc -from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchIngestDoc, - ElasticsearchIngestDocBatch, -) -from unstructured.ingest.connector.fsspec.azure import AzureBlobStorageIngestDoc -from unstructured.ingest.connector.fsspec.box import BoxIngestDoc -from unstructured.ingest.connector.fsspec.dropbox import DropboxIngestDoc -from unstructured.ingest.connector.fsspec.gcs import GcsIngestDoc -from unstructured.ingest.connector.fsspec.s3 import S3IngestDoc -from unstructured.ingest.connector.fsspec.sftp import SftpIngestDoc -from unstructured.ingest.connector.github import GitHubIngestDoc -from unstructured.ingest.connector.gitlab import GitLabIngestDoc -from unstructured.ingest.connector.google_drive import GoogleDriveIngestDoc -from unstructured.ingest.connector.hubspot import HubSpotIngestDoc -from unstructured.ingest.connector.jira import JiraIngestDoc -from unstructured.ingest.connector.kafka import KafkaIngestDoc -from unstructured.ingest.connector.local import LocalIngestDoc -from unstructured.ingest.connector.mongodb import MongoDBIngestDoc, MongoDBIngestDocBatch -from unstructured.ingest.connector.notion.connector import ( - NotionDatabaseIngestDoc, - NotionPageIngestDoc, -) -from unstructured.ingest.connector.onedrive import OneDriveIngestDoc -from unstructured.ingest.connector.opensearch import OpenSearchIngestDoc, OpenSearchIngestDocBatch -from unstructured.ingest.connector.outlook import OutlookIngestDoc -from unstructured.ingest.connector.reddit import RedditIngestDoc -from unstructured.ingest.connector.salesforce import SalesforceIngestDoc -from unstructured.ingest.connector.sharepoint import SharepointIngestDoc -from unstructured.ingest.connector.slack import SlackIngestDoc -from unstructured.ingest.connector.wikipedia import ( - WikipediaIngestHTMLDoc, - WikipediaIngestSummaryDoc, - WikipediaIngestTextDoc, -) -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.interfaces import BaseIngestDoc - -INGEST_DOC_NAME_TO_CLASS: Dict[str, Type[EnhancedDataClassJsonMixin]] = { - "airtable": AirtableIngestDoc, - "astradb": AstraDBIngestDoc, - "azure": AzureBlobStorageIngestDoc, - "biomed": BiomedIngestDoc, - "box": BoxIngestDoc, - "confluence": ConfluenceIngestDoc, - "delta-table": DeltaTableIngestDoc, - "discord": DiscordIngestDoc, - "dropbox": DropboxIngestDoc, - "elasticsearch": ElasticsearchIngestDoc, - "elasticsearch_batch": ElasticsearchIngestDocBatch, - "gcs": GcsIngestDoc, - "github": GitHubIngestDoc, - "gitlab": GitLabIngestDoc, - "google_drive": GoogleDriveIngestDoc, - "hubspot": HubSpotIngestDoc, - "jira": JiraIngestDoc, - "kafka": KafkaIngestDoc, - "local": LocalIngestDoc, - "mongodb": MongoDBIngestDoc, - "mongodb_batch": MongoDBIngestDocBatch, - "notion_database": NotionDatabaseIngestDoc, - "notion_page": NotionPageIngestDoc, - "onedrive": OneDriveIngestDoc, - "opensearch": OpenSearchIngestDoc, - "opensearch_batch": OpenSearchIngestDocBatch, - "outlook": OutlookIngestDoc, - "reddit": RedditIngestDoc, - "s3": S3IngestDoc, - "salesforce": SalesforceIngestDoc, - "sftp": SftpIngestDoc, - "sharepoint": SharepointIngestDoc, - "slack": SlackIngestDoc, - "wikipedia_html": WikipediaIngestHTMLDoc, - "wikipedia_text": WikipediaIngestTextDoc, - "wikipedia_summary": WikipediaIngestSummaryDoc, -} - - -def create_ingest_doc_from_json(ingest_doc_json: str) -> BaseIngestDoc: - try: - ingest_doc_dict: dict = json.loads(ingest_doc_json) - except TypeError as te: - raise TypeError( - f"failed to load json string when deserializing IngestDoc: {ingest_doc_json}", - ) from te - return create_ingest_doc_from_dict(ingest_doc_dict) - - -def create_ingest_doc_from_dict(ingest_doc_dict: dict) -> BaseIngestDoc: - ingest_doc_dict = ingest_doc_dict.copy() - if "registry_name" not in ingest_doc_dict: - raise ValueError(f"registry_name not present in ingest doc: {ingest_doc_dict}") - registry_name = ingest_doc_dict.pop("registry_name") - try: - ingest_doc_cls = INGEST_DOC_NAME_TO_CLASS[registry_name] - return cast(BaseIngestDoc, ingest_doc_cls.from_dict(ingest_doc_dict)) - except KeyError: - raise ValueError( - f"Error: Received unknown IngestDoc name: {registry_name} while deserializing", - "IngestDoc.", - ) diff --git a/unstructured/ingest/connector/salesforce.py b/unstructured/ingest/connector/salesforce.py deleted file mode 100644 index b17810120..000000000 --- a/unstructured/ingest/connector/salesforce.py +++ /dev/null @@ -1,301 +0,0 @@ -""" -Salesforce Connector -Able to download Account, Case, Campaign, EmailMessage, Lead -Salesforce returns everything as a list of json. -This saves each entry as a separate file to be partitioned. -Using JWT authorization -https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_key_and_cert.htm -https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_connected_app.htm -""" - -import json -import typing as t -from collections import OrderedDict -from dataclasses import dataclass, field -from datetime import datetime -from email.utils import formatdate -from pathlib import Path -from string import Template -from textwrap import dedent - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - - -class MissingCategoryError(Exception): - """There are no categories with that name.""" - - -SALESFORCE_API_VERSION = "57.0" - -ACCEPTED_CATEGORIES = ["Account", "Case", "Campaign", "EmailMessage", "Lead"] - -EMAIL_TEMPLATE = Template( - """MIME-Version: 1.0 -Date: $date -Message-ID: $message_identifier -Subject: $subject -From: $from_email -To: $to_email -Content-Type: multipart/alternative; boundary="00000000000095c9b205eff92630" ---00000000000095c9b205eff92630 -Content-Type: text/plain; charset="UTF-8" -$textbody ---00000000000095c9b205eff92630 -Content-Type: text/html; charset="UTF-8" -$htmlbody ---00000000000095c9b205eff92630-- -""", -) - - -@dataclass -class SalesforceAccessConfig(AccessConfig): - consumer_key: str = enhanced_field(sensitive=True) - private_key: str = enhanced_field(sensitive=True) - - @requires_dependencies(["cryptography"]) - def get_private_key_value_and_type(self) -> t.Tuple[str, t.Type]: - from cryptography.hazmat.primitives import serialization - - try: - serialization.load_pem_private_key(data=self.private_key.encode("utf-8"), password=None) - except ValueError: - pass - else: - return self.private_key, str - - if Path(self.private_key).is_file(): - return self.private_key, Path - - raise ValueError("private_key does not contain PEM private key or path") - - -@dataclass -class SimpleSalesforceConfig(BaseConnectorConfig): - """Connector specific attributes""" - - access_config: SalesforceAccessConfig - categories: t.List[str] - username: str - recursive: bool = False - - @requires_dependencies(["simple_salesforce"], extras="salesforce") - def get_client(self): - from simple_salesforce import Salesforce - - pkey_value, pkey_type = self.access_config.get_private_key_value_and_type() - - return Salesforce( - username=self.username, - consumer_key=self.access_config.consumer_key, - privatekey_file=pkey_value if pkey_type is Path else None, - privatekey=pkey_value if pkey_type is str else None, - version=SALESFORCE_API_VERSION, - ) - - -@dataclass -class SalesforceIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleSalesforceConfig - record_type: str - record_id: str - registry_name: str = "salesforce" - _record: OrderedDict = field(default_factory=lambda: OrderedDict()) - - @property - def record(self): - if not self._record: - self._record = self.get_record() - return self._record - - def get_file_extension(self) -> str: - if self.record_type == "EmailMessage": - extension = ".eml" - elif self.record_type in ["Account", "Lead", "Case", "Campaign"]: - extension = ".xml" - else: - raise MissingCategoryError( - f"There are no categories with the name: {self.record_type}", - ) - return extension - - def _tmp_download_file(self) -> Path: - record_file = self.record_id + self.get_file_extension() - return Path(self.read_config.download_dir) / self.record_type / record_file - - @property - def _output_filename(self) -> Path: - record_file = self.record_id + self.get_file_extension() + ".json" - return Path(self.processor_config.output_dir) / self.record_type / record_file - - def _create_full_tmp_dir_path(self): - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - def _xml_for_record(self, record: OrderedDict) -> str: - """Creates partitionable xml file from a record""" - import xml.etree.ElementTree as ET - - def flatten_dict(data, parent, prefix=""): - for key, value in data.items(): - if isinstance(value, OrderedDict): - flatten_dict(value, parent, prefix=f"{prefix}{key}.") - else: - item = ET.Element("item") - item.text = f"{prefix}{key}: {value}" - parent.append(item) - - root = ET.Element("root") - flatten_dict(record, root) - xml_string = ET.tostring(root, encoding="utf-8", xml_declaration=True).decode() - return xml_string - - def _eml_for_record(self, email_json: t.Dict[str, t.Any]) -> str: - from dateutil import parser # type: ignore - - """Recreates standard expected .eml format using template.""" - eml = EMAIL_TEMPLATE.substitute( - date=formatdate(parser.parse(email_json.get("MessageDate")).timestamp()), - message_identifier=email_json.get("MessageIdentifier"), - subject=email_json.get("Subject"), - from_email=email_json.get("FromAddress"), - to_email=email_json.get("ToAddress"), - textbody=email_json.get("TextBody"), - # TODO: This is a hack to get emails to process correctly. - # The HTML partitioner seems to have issues with
and text without tags like

- htmlbody=email_json.get("HtmlBody", "") # "" because you can't .replace None - .replace("
", "

") - .replace(" OrderedDict: - # Get record from Salesforce based on id - response = self._get_response() - logger.debug(f"response was returned for salesforce record id: {self.record_id}") - records = response["records"] - if not records: - raise ValueError( - f"No record found with record id {self.record_id}: {json.dumps(response)}" - ) - record_json = records[0] - return record_json - - def update_source_metadata(self) -> None: # type: ignore - record_json = self.record - - date_format = "%Y-%m-%dT%H:%M:%S.000+0000" - self.source_metadata = SourceMetadata( - date_created=datetime.strptime(record_json["CreatedDate"], date_format).isoformat(), - date_modified=datetime.strptime( - record_json["LastModifiedDate"], - date_format, - ).isoformat(), - # SystemModstamp is Timestamp if record has been modified by person or automated system - version=record_json.get("SystemModstamp"), - source_url=record_json["attributes"].get("url"), - exists=True, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - """Saves individual json records locally.""" - self._create_full_tmp_dir_path() - record = self.record - - self.update_source_metadata() - - try: - if self.record_type == "EmailMessage": - document = self._eml_for_record(record) - else: - document = self._xml_for_record(record) - - with open(self._tmp_download_file(), "w") as page_file: - page_file.write(document) - - except Exception as e: - logger.error( - f"Error while downloading and saving file: {self.record_id}.", - ) - logger.error(e) - - @property - def filename(self): - """The filename of the file created from a Salesforce record""" - return self._tmp_download_file() - - -@dataclass -class SalesforceSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleSalesforceConfig - - def __post_init__(self): - self.ingest_doc_cls: t.Type[SalesforceIngestDoc] = SalesforceIngestDoc - - def initialize(self): - pass - - @requires_dependencies(["simple_salesforce"], extras="salesforce") - def check_connection(self): - from simple_salesforce.exceptions import SalesforceError - - try: - self.connector_config.get_client() - except SalesforceError as salesforce_error: - logger.error(f"failed to validate connection: {salesforce_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {salesforce_error}") - - @requires_dependencies(["simple_salesforce"], extras="salesforce") - def get_ingest_docs(self) -> t.List[SalesforceIngestDoc]: - """Get Salesforce Ids for the records. - Send them to next phase where each doc gets downloaded into the - appropriate format for partitioning. - """ - from simple_salesforce.exceptions import SalesforceMalformedRequest - - client = self.connector_config.get_client() - - ingest_docs = [] - for record_type in self.connector_config.categories: - if record_type not in ACCEPTED_CATEGORIES: - raise ValueError(f"{record_type} not currently an accepted Salesforce category") - - try: - # Get ids from Salesforce - records = client.query_all( - f"select Id from {record_type}", - ) - for record in records["records"]: - ingest_docs.append( - SalesforceIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - record_type=record_type, - record_id=record["Id"], - ), - ) - except SalesforceMalformedRequest as e: - raise SalesforceMalformedRequest(f"Problem with Salesforce query: {e}") - - return ingest_docs diff --git a/unstructured/ingest/connector/sharepoint.py b/unstructured/ingest/connector/sharepoint.py deleted file mode 100644 index c65722404..000000000 --- a/unstructured/ingest/connector/sharepoint.py +++ /dev/null @@ -1,573 +0,0 @@ -import json -import os -import typing as t -from dataclasses import dataclass -from html import unescape -from pathlib import Path -from urllib.parse import urlparse - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.interfaces import PermissionsConfig as SharepointPermissionsConfig -from unstructured.ingest.logger import logger -from unstructured.ingest.utils.string_and_date_utils import ensure_isoformat_datetime -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from office365.sharepoint.client_context import ClientContext - from office365.sharepoint.files.file import File - from office365.sharepoint.publishing.pages.page import SitePage - -MAX_MB_SIZE = 512_000_000 -CONTENT_LABELS = ["CanvasContent1", "LayoutWebpartsContent1", "TimeCreated"] - - -@dataclass -class SharepointAccessConfig(AccessConfig): - client_cred: str = enhanced_field(repr=False, sensitive=True) - - -@dataclass -class SimpleSharepointConfig(BaseConnectorConfig): - access_config: SharepointAccessConfig - client_id: str - site: str - path: str - process_pages: bool = enhanced_field(default=True, init=False) - recursive: bool = False - files_only: bool = False - permissions_config: t.Optional[SharepointPermissionsConfig] = None - - def __post_init__(self): - if not (self.client_id and self.access_config.client_cred and self.site): - raise ValueError( - "Please provide one of the following mandatory values:" - "\n--client-id\n--client-cred\n--site", - ) - self.process_pages = not self.files_only - - @requires_dependencies(["office365"], extras="sharepoint") - def get_site_client(self, site_url: str = "") -> "ClientContext": - from office365.runtime.auth.client_credential import ClientCredential - from office365.sharepoint.client_context import ClientContext - - try: - site_client = ClientContext(site_url or self.site).with_credentials( - ClientCredential(self.client_id, self.access_config.client_cred), - ) - except Exception: - logger.error("Couldn't set Sharepoint client.") - raise - return site_client - - def get_permissions_client(self): - try: - permissions_connector = SharepointPermissionsConnector(self.permissions_config) - assert permissions_connector.access_token - return permissions_connector - except Exception as e: - logger.error("Couldn't obtain Sharepoint permissions ingestion access token:", e) - - -@dataclass -class SharepointIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleSharepointConfig - site_url: str - server_path: str - is_page: bool - file_path: str - registry_name: str = "sharepoint" - - def __post_init__(self): - self.extension = Path(self.file_path).suffix if not self.is_page else ".html" - self.extension = ".html" if self.extension == ".aspx" else self.extension - if not self.extension: - raise ValueError("Unsupported file without extension.") - - self._set_download_paths() - - def _set_download_paths(self) -> None: - """Parses the folder structure from the source and creates the download and output paths""" - download_path = Path(f"{self.read_config.download_dir}") - output_path = Path(f"{self.processor_config.output_dir}") - parent = Path(self.file_path).with_suffix(self.extension) - self.download_dir = (download_path / parent.parent).resolve() - self.download_filepath = (download_path / parent).resolve() - output_filename = str(parent) + ".json" - self.output_dir = (output_path / parent.parent).resolve() - self.output_filepath = (output_path / output_filename).resolve() - - @property - def filename(self): - return Path(self.download_filepath).resolve() - - @property - def _output_filename(self): - return Path(self.output_filepath).resolve() - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "server_path": self.server_path, - "site_url": self.site_url, - } - - @SourceConnectionNetworkError.wrap - @requires_dependencies(["office365"], extras="sharepoint") - def _fetch_file(self, properties_only: bool = False): - """Retrieves the actual page/file from the Sharepoint instance""" - from office365.runtime.client_request_exception import ClientRequestException - - site_client = self.connector_config.get_site_client(self.site_url) - - try: - if self.is_page: - file = site_client.web.get_file_by_server_relative_path("/" + self.server_path) - file = file.listItemAllFields.select(CONTENT_LABELS).get().execute_query() - else: - file = site_client.web.get_file_by_server_relative_url(self.server_path) - if properties_only: - file = file.get().execute_query() - except ClientRequestException as e: - if e.response.status_code == 404: - return None - raise - return file - - def _fetch_page(self): - site_client = self.connector_config.get_site_client(self.site_url) - try: - page = ( - site_client.site_pages.pages.get_by_url(self.server_path) - .expand(["FirstPublished", "Modified", "Version"]) - .get() - .execute_query() - ) - except Exception as e: - logger.error(f"Failed to retrieve page {self.server_path} from site {self.site_url}") - logger.error(e) - return None - return page - - def update_permissions_data(self): - def parent_name_matches(parent_type, permissions_filename, ingest_doc_filepath): - permissions_filename = permissions_filename.split("_SEP_") - ingest_doc_filepath = ingest_doc_filepath.split("/") - - if parent_type == "sites": - return permissions_filename[0] == ingest_doc_filepath[1] - - elif parent_type == "SitePages" or parent_type == "Shared Documents": - return True - - permissions_data = None - permissions_dir = Path(self.processor_config.output_dir) / "permissions_data" - - if permissions_dir.is_dir(): - parent_type = self.file_path.split("/")[0] - - if parent_type == "sites": - read_dir = permissions_dir / "sites" - elif parent_type == "SitePages" or parent_type == "Shared Documents": - read_dir = permissions_dir / "other" - else: - read_dir = permissions_dir / "other" - - for filename in os.listdir(read_dir): - permissions_docname = os.path.splitext(filename)[0].split("_SEP_")[1] - ingestdoc_docname = self.file_path.split("/")[-1] - - if ingestdoc_docname == permissions_docname and parent_name_matches( - parent_type=parent_type, - permissions_filename=filename, - ingest_doc_filepath=self.file_path, - ): - with open(read_dir / filename) as f: - permissions_data = json.loads(f.read()) - - return permissions_data - - def update_source_metadata(self, **kwargs): - if self.is_page: - page = self._fetch_page() - if page is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - self.source_metadata = SourceMetadata( - date_created=page.get_property("FirstPublished", None), - date_modified=page.get_property("Modified", None), - version=page.get_property("Version", ""), - source_url=page.absolute_url, - exists=True, - permissions_data=( - self.update_permissions_data() - if self.connector_config.permissions_config - else None - ), - ) - return - - file = self._fetch_file(True) - if file is None: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - self.source_metadata = SourceMetadata( - date_created=ensure_isoformat_datetime(timestamp=file.time_created), - date_modified=ensure_isoformat_datetime(timestamp=file.time_last_modified), - version=file.major_version, - source_url=file.properties.get("LinkingUrl", None), - exists=True, - permissions_data=( - self.update_permissions_data() if self.connector_config.permissions_config else None - ), - ) - - def _download_page(self): - """Formats and saves locally page content""" - content = self._fetch_file() - self.update_source_metadata() - pld = (content.properties.get("LayoutWebpartsContent1", "") or "") + ( - content.properties.get("CanvasContent1", "") or "" - ) - if pld != "": - pld = unescape(pld) - else: - logger.info( - f"Page {self.server_path} has no retrievable content. \ - Dumping empty doc.", - ) - pld = "

" - - self.output_dir.mkdir(parents=True, exist_ok=True) - if not self.download_dir.is_dir(): - logger.debug(f"Creating directory: {self.download_dir}") - self.download_dir.mkdir(parents=True, exist_ok=True) - with self.filename.open(mode="w") as f: - f.write(pld) - logger.info(f"File downloaded: {self.filename}") - - def _download_file(self): - file = self._fetch_file() - self.update_source_metadata() - fsize = file.length - self.output_dir.mkdir(parents=True, exist_ok=True) - - if not self.download_dir.is_dir(): - logger.debug(f"Creating directory: {self.download_dir}") - self.download_dir.mkdir(parents=True, exist_ok=True) - - if fsize > MAX_MB_SIZE: - logger.info(f"Downloading file with size: {fsize} bytes in chunks") - with self.filename.open(mode="wb") as f: - file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query() - else: - with self.filename.open(mode="wb") as f: - file.download(f).execute_query() - logger.info(f"File downloaded: {self.filename}") - - @BaseSingleIngestDoc.skip_if_file_exists - @SourceConnectionError.wrap - @requires_dependencies(["office365"]) - def get_file(self): - if self.is_page: - self._download_page() - else: - self._download_file() - return - - -@dataclass -class SharepointSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleSharepointConfig - - def check_connection(self): - try: - site_client = self.connector_config.get_site_client() - site_client.site_pages.pages.get().execute_query() - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - @requires_dependencies(["office365"], extras="sharepoint") - def _list_files(self, folder, recursive) -> t.List["File"]: - from office365.runtime.client_request_exception import ClientRequestException - - try: - objects = folder.expand(["Files", "Folders"]).get().execute_query() - files = list(objects.files) - if not recursive: - return files - for f in objects.folders: - if "/Forms" in f.serverRelativeUrl: - continue - files += self._list_files(f, recursive) - return files - except ClientRequestException as e: - if e.response.status_code != 404: - logger.info("Caught an error while processing documents %s", e.response.text) - return [] - - def _prepare_ingest_doc(self, obj: t.Union["File", "SitePage"], base_url, is_page=False): - if is_page: - file_path = obj.get_property("Url", "") - server_path = file_path if file_path[0] != "/" else file_path[1:] - if (url_path := (urlparse(base_url).path)) and (url_path != "/"): - file_path = url_path[1:] + "/" + file_path - else: - server_path = obj.serverRelativeUrl - file_path = obj.serverRelativeUrl[1:] - - return SharepointIngestDoc( - processor_config=self.processor_config, - read_config=self.read_config, - connector_config=self.connector_config, - site_url=base_url, - server_path=server_path, - is_page=is_page, - file_path=file_path, - ) - - @requires_dependencies(["office365"], extras="sharepoint") - def _list_pages(self, site_client) -> list: - from office365.runtime.client_request_exception import ClientRequestException - - try: - site_pages = site_client.site_pages.pages.get().execute_query() - except ClientRequestException as e: - logger.info( - "Caught an error while retrieving site pages from %s \n%s", - site_client.base_url, - e.response.text, - ) - return [] - - return [self._prepare_ingest_doc(page, site_client.base_url, True) for page in site_pages] - - def _ingest_site_docs(self, site_client) -> t.List["SharepointIngestDoc"]: - root_folder = site_client.web.get_folder_by_server_relative_path(self.connector_config.path) - files = self._list_files(root_folder, self.connector_config.recursive) - if not files: - logger.info( - f"No processable files at path {self.connector_config.path}\ - for site {site_client.base_url}", - ) - output = [] - for file in files: - try: - output.append(self._prepare_ingest_doc(file, site_client.base_url)) - except ValueError as e: - logger.error("Unable to process file %s", file.properties["Name"]) - logger.error(e) - if self.connector_config.process_pages: - page_output = self._list_pages(site_client) - if not page_output: - logger.info(f"Couldn't process pages for site {site_client.base_url}") - output = output + page_output - return output - - def initialize(self): - pass - - def get_ingest_docs(self): - base_site_client = self.connector_config.get_site_client() - - if not all( - getattr(self.connector_config.permissions_config, attr, False) - for attr in ["application_id", "client_cred", "tenant"] - ): - logger.info( - "Permissions config is not fed with 'application_id', 'client_cred' and 'tenant'." - "Skipping permissions ingestion.", - ) - else: - permissions_client = self.connector_config.get_permissions_client() - if permissions_client: - permissions_client.write_all_permissions(self.processor_config.output_dir) - - if not base_site_client.is_tenant: - return self._ingest_site_docs(base_site_client) - tenant = base_site_client.tenant - tenant_sites = tenant.get_site_properties_from_sharepoint_by_filters().execute_query() - tenant_sites = {s.url for s in tenant_sites if (s.url is not None)} - ingest_docs: t.List[SharepointIngestDoc] = [] - for site_url in tenant_sites: - logger.info(f"Processing docs for site: {site_url}") - site_client = self.connector_config.get_site_client(site_url) - ingest_docs = ingest_docs + self._ingest_site_docs(site_client) - return ingest_docs - - -@dataclass -class SharepointPermissionsConnector: - def __init__(self, permissions_config): - self.permissions_config: SharepointPermissionsConfig = permissions_config - self.initialize() - - def initialize(self): - self.access_token: str = self.get_access_token() - - @requires_dependencies(["requests"], extras="sharepoint") - def get_access_token(self) -> str: - import requests - - url = ( - f"https://login.microsoftonline.com/{self.permissions_config.tenant}/oauth2/v2.0/token" - ) - headers = {"Content-Type": "application/x-www-form-urlencoded"} - data = { - "client_id": self.permissions_config.application_id, - "scope": "https://graph.microsoft.com/.default", - "client_secret": self.permissions_config.client_cred, - "grant_type": "client_credentials", - } - response = requests.post(url, headers=headers, data=data) - return response.json()["access_token"] - - def validated_response(self, response): - if response.status_code == 200: - return response.json() - else: - logger.info(f"Request failed with status code {response.status_code}:") - logger.info(response.text) - - @requires_dependencies(["requests"], extras="sharepoint") - def get_sites(self): - import requests - - url = "https://graph.microsoft.com/v1.0/sites" - params = { - "$select": "webUrl, id", - } - - headers = { - "Authorization": f"Bearer {self.access_token}", - } - - response = requests.get(url, params=params, headers=headers) - return self.validated_response(response) - - @requires_dependencies(["requests"], extras="sharepoint") - def get_drives(self, site): - import requests - - url = f"https://graph.microsoft.com/v1.0/sites/{site}/drives" - - headers = { - "Authorization": f"Bearer {self.access_token}", - } - - response = requests.get(url, headers=headers) - - return self.validated_response(response) - - @requires_dependencies(["requests"], extras="sharepoint") - def get_drive_items(self, site, drive_id): - import requests - - url = f"https://graph.microsoft.com/v1.0/sites/{site}/drives/{drive_id}/root/children" - - headers = { - "Authorization": f"Bearer {self.access_token}", - } - - response = requests.get(url, headers=headers) - - return self.validated_response(response) - - def extract_site_name_from_weburl(self, weburl): - split_path = urlparse(weburl).path.lstrip("/").split("/") - - if split_path[0] == "sites": - return "sites", split_path[1] - - elif split_path[0] == "Shared%20Documents": - return "Shared Documents", "Shared Documents" - - elif split_path[0] == "personal": - return "Personal", "Personal" - - elif split_path[0] == "_layouts": - return "layouts", "layouts" - - # if other weburl structures are found, additional logic might need to be implemented - - logger.warning( - """Couldn't extract sitename, unknown site or parent type. Skipping permissions - ingestion for the document with the URL:""", - weburl, - ) - - return None, None - - @requires_dependencies(["requests"], extras="sharepoint") - def get_permissions_for_drive_item(self, site, drive_id, item_id): - import requests - - url = f"https://graph.microsoft.com/v1.0/sites/ \ - {site}/drives/{drive_id}/items/{item_id}/permissions" - - headers = { - "Authorization": f"Bearer {self.access_token}", - } - - response = requests.get(url, headers=headers) - - return self.validated_response(response) - - def write_all_permissions(self, output_dir): - sites = [(site["id"], site["webUrl"]) for site in self.get_sites()["value"]] - drive_ids = [] - - logger.info("Obtaining drive data for sites for permissions (rbac)") - for site_id, site_url in sites: - drives = self.get_drives(site_id) - if drives: - drives_for_site = drives["value"] - drive_ids.extend([(site_id, drive["id"]) for drive in drives_for_site]) - - logger.info("Obtaining item data from drives for permissions (rbac)") - item_ids = [] - for site, drive_id in drive_ids: - drive_items = self.get_drive_items(site, drive_id) - if drive_items: - item_ids.extend( - [ - (site, drive_id, item["id"], item["name"], item["webUrl"]) - for item in drive_items["value"] - ], - ) - - permissions_dir = Path(output_dir) / "permissions_data" - - logger.info("Writing permissions data to disk") - for site, drive_id, item_id, item_name, item_web_url in item_ids: - res = self.get_permissions_for_drive_item(site, drive_id, item_id) - if res: - parent_type, parent_name = self.extract_site_name_from_weburl(item_web_url) - - if parent_type == "sites": - write_path = permissions_dir / "sites" / f"{parent_name}_SEP_{item_name}.json" - - elif parent_type == "Personal" or parent_type == "Shared Documents": - write_path = permissions_dir / "other" / f"{parent_name}_SEP_{item_name}.json" - else: - write_path = permissions_dir / "other" / f"{parent_name}_SEP_{item_name}.json" - - if not Path(os.path.dirname(write_path)).is_dir(): - os.makedirs(os.path.dirname(write_path)) - - with open(write_path, "w") as f: - json.dump(res["value"], f) diff --git a/unstructured/ingest/connector/slack.py b/unstructured/ingest/connector/slack.py deleted file mode 100644 index 4f6a8ce42..000000000 --- a/unstructured/ingest/connector/slack.py +++ /dev/null @@ -1,224 +0,0 @@ -import typing as t -import xml.etree.ElementTree as ET -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import ( - requires_dependencies, - validate_date_args, -) - -DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z") - - -@dataclass -class SlackAccessConfig(AccessConfig): - token: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleSlackConfig(BaseConnectorConfig): - """Connector config to process all messages by channel id's.""" - - access_config: SlackAccessConfig - channels: t.List[str] - start_date: t.Optional[str] = None - end_date: t.Optional[str] = None - - def validate_inputs(self): - oldest_valid = True - latest_valid = True - - if self.start_date: - oldest_valid = validate_date_args(self.start_date) - - if self.end_date: - latest_valid = validate_date_args(self.end_date) - - return oldest_valid, latest_valid - - def __post_init__(self): - oldest_valid, latest_valid = self.validate_inputs() - if not oldest_valid and not latest_valid: - raise ValueError( - "Start and/or End dates are not valid. ", - ) - - -@dataclass -class SlackIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - """Class encapsulating fetching a doc and writing processed results (but not - doing the processing!). - - Also includes a cleanup method. When things go wrong and the cleanup - method is not called, the file is left behind on the filesystem to assist debugging. - """ - - connector_config: SimpleSlackConfig - channel: str - registry_name: str = "slack" - - # NOTE(crag): probably doesn't matter, but intentionally not defining tmp_download_file - # __post_init__ for multiprocessing simplicity (no Path objects in initially - # instantiated object) - def _tmp_download_file(self): - channel_file = self.channel + ".xml" - return Path(self.read_config.download_dir) / channel_file - - @property - def _output_filename(self): - output_file = self.channel + ".json" - return Path(self.processor_config.output_dir) / output_file - - @property - def version(self) -> t.Optional[str]: - return None - - @property - def source_url(self) -> t.Optional[str]: - return None - - def _create_full_tmp_dir_path(self): - self._tmp_download_file().parent.mkdir(parents=True, exist_ok=True) - - @SourceConnectionNetworkError.wrap - @requires_dependencies(dependencies=["slack_sdk"], extras="slack") - def _fetch_messages(self): - from slack_sdk import WebClient - - self.client = WebClient(token=self.connector_config.access_config.token) - oldest = "0" - latest = "0" - if self.connector_config.start_date: - oldest = self.convert_datetime(self.connector_config.start_date) - - if self.connector_config.end_date: - latest = self.convert_datetime(self.connector_config.end_date) - - result = self.client.conversations_history( - channel=self.channel, - oldest=oldest, - latest=latest, - ) - return result - - def update_source_metadata(self, **kwargs): - result = kwargs.get("result", self._fetch_messages()) - if result is None: - self.source_metadata = SourceMetadata( - exists=True, - ) - return - timestamps = [m["ts"] for m in result["messages"]] - timestamps.sort() - date_created = None - date_modified = None - if len(timestamps) > 0: - date_created = datetime.fromtimestamp(float(timestamps[0])).isoformat() - date_modified = datetime.fromtimestamp( - float(timestamps[len(timestamps) - 1]), - ).isoformat() - - self.source_metadata = SourceMetadata( - date_created=date_created, - date_modified=date_modified, - exists=True, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - @requires_dependencies(dependencies=["slack_sdk"], extras="slack") - def get_file(self): - from slack_sdk.errors import SlackApiError - - """Fetches the data from a slack channel and stores it locally.""" - - self._create_full_tmp_dir_path() - - result = self._fetch_messages() - self.update_source_metadata(result=result) - root = ET.Element("messages") - for message in result["messages"]: - message_elem = ET.SubElement(root, "message") - text_elem = ET.SubElement(message_elem, "text") - text_elem.text = message.get("text") - - cursor = None - while True: - try: - response = self.client.conversations_replies( - channel=self.channel, - ts=message["ts"], - cursor=cursor, - ) - - for reply in response["messages"]: - reply_msg = reply.get("text") - text_elem.text = "".join([str(text_elem.text), " ", reply_msg]) - - if not response["has_more"]: - break - - cursor = response["response_metadata"]["next_cursor"] - - except SlackApiError as e: - logger.error(f"Error retrieving replies: {e.response['error']}") - tree = ET.ElementTree(root) - tree.write(self._tmp_download_file(), encoding="utf-8", xml_declaration=True) - - def convert_datetime(self, date_time): - for format in DATE_FORMATS: - try: - return datetime.strptime(date_time, format).timestamp() - except ValueError: - pass - - @property - def filename(self): - """The filename of the file created from a slack channel""" - return self._tmp_download_file() - - -class SlackSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - """Objects of this class support fetching document(s) from""" - - connector_config: SimpleSlackConfig - - @requires_dependencies(dependencies=["slack_sdk"], extras="slack") - def check_connection(self): - from slack_sdk import WebClient - from slack_sdk.errors import SlackClientError - - try: - client = WebClient(token=self.connector_config.access_config.token) - client.users_identity() - except SlackClientError as slack_error: - logger.error(f"failed to validate connection: {slack_error}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {slack_error}") - - def initialize(self): - """Verify that can get metadata for an object, validates connections info.""" - - def get_ingest_docs(self): - return [ - SlackIngestDoc( - connector_config=self.connector_config, - processor_config=self.processor_config, - read_config=self.read_config, - channel=channel, - ) - for channel in self.connector_config.channels - ] diff --git a/unstructured/ingest/connector/sql.py b/unstructured/ingest/connector/sql.py deleted file mode 100644 index 21f1f4a1f..000000000 --- a/unstructured/ingest/connector/sql.py +++ /dev/null @@ -1,196 +0,0 @@ -import copy -import json -import typing as t -import uuid -from dataclasses import dataclass, field - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -ELEMENTS_TABLE_NAME = "elements" - - -@dataclass -class SqlAccessConfig(AccessConfig): - username: t.Optional[str] - password: t.Optional[str] = enhanced_field(sensitive=True) - - -@dataclass -class SimpleSqlConfig(BaseConnectorConfig): - db_type: t.Optional[str] - host: t.Optional[str] - database: t.Optional[str] - port: t.Optional[int] - access_config: SqlAccessConfig - - def __post_init__(self): - if (self.db_type == "sqlite") and (self.database is None): - raise ValueError( - "A sqlite connection requires a path to a *.db file " - "through the `database` argument" - ) - - @property - def connection(self): - if self.db_type == "postgresql": - return self._make_psycopg_connection - elif self.db_type == "sqlite": - return self._make_sqlite_connection - raise ValueError(f"Unsupported database {self.db_type} connection.") - - def _make_sqlite_connection(self): - from sqlite3 import connect - - return connect(database=self.database) - - @requires_dependencies(["psycopg2"], extras="postgres") - def _make_psycopg_connection(self): - from psycopg2 import connect - - return connect( - user=self.access_config.username, - password=self.access_config.password, - dbname=self.database, - host=self.host, - port=self.port, - ) - - -@dataclass -class SqlDestinationConnector(BaseDestinationConnector): - connector_config: SimpleSqlConfig - _client: t.Optional[t.Any] = field(init=False, default=None) - - def to_dict(self, **kwargs): - """ - The _client variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle '_thread.lock' object - When serializing, remove it, meaning client data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - if hasattr(self_cp, "_client"): - setattr(self_cp, "_client", None) - return _asdict(self_cp, **kwargs) - - @property - def client(self): - if self._client is None: - self._client = self.connector_config.connection() - return self._client - - @DestinationConnectionError.wrap - def initialize(self): - _ = self.client - - def check_connection(self): - try: - cursor = self.client.cursor() - cursor.execute("SELECT 1;") - cursor.close() - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def conform_dict(self, data: dict) -> None: - """ - Updates the element dictionary to conform to the sql schema - """ - from datetime import datetime - - data["id"] = str(uuid.uuid4()) - - # Dict as string formatting - if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"): - # Explicit casting otherwise fails schema type checking - data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator)) - - # Array of items as string formatting - if (embeddings := data.get("embeddings")) and ( - self.connector_config.db_type != "postgresql" - ): - data["embeddings"] = str(json.dumps(embeddings)) - - if points := data.get("metadata", {}).get("coordinates", {}).get("points"): - data["metadata"]["coordinates"]["points"] = str(json.dumps(points)) - - if links := data.get("metadata", {}).get("links", {}): - data["metadata"]["links"] = str(json.dumps(links)) - - if permissions_data := ( - data.get("metadata", {}).get("data_source", {}).get("permissions_data") - ): - data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data) - - if sent_from := data.get("metadata", {}).get("sent_from", {}): - data["metadata"]["sent_from"] = str(json.dumps(sent_from)) - - if sent_to := data.get("metadata", {}).get("sent_to", {}): - data["metadata"]["sent_to"] = str(json.dumps(sent_to)) - - # Datetime formatting - if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"): - data["metadata"]["data_source"]["date_created"] = datetime.fromisoformat(date_created) - - if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"): - data["metadata"]["data_source"]["date_modified"] = datetime.fromisoformat(date_modified) - - if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"): - data["metadata"]["data_source"]["date_processed"] = datetime.fromisoformat( - date_processed - ) - - if last_modified := data.get("metadata", {}).get("last_modified", {}): - data["metadata"]["last_modified"] = datetime.fromisoformat(last_modified) - - # String casting - if version := data.get("metadata", {}).get("data_source", {}).get("version"): - data["metadata"]["data_source"]["version"] = str(version) - - if page_number := data.get("metadata", {}).get("page_number"): - data["metadata"]["page_number"] = str(page_number) - - if data.get("metadata", {}).get("data_source", None): - data.update(data.get("metadata", {}).pop("data_source", None)) - if data.get("metadata", {}).get("coordinates", None): - data.update(data.get("metadata", {}).pop("coordinates", None)) - if data.get("metadata", {}): - data.update(data.pop("metadata", None)) - - @DestinationConnectionError.wrap - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info( - f"writing {len(elements_dict)} objects to database {self.connector_config.database} " - f"at {self.connector_config.host}" - ) - - with self.client as conn: - cursor = conn.cursor() - - # Since we have no guarantee that each element will have the same keys - # we insert each element individually - for elem in elements_dict: - query = f"INSERT INTO {ELEMENTS_TABLE_NAME} ({','.join(elem.keys())}) \ - VALUES({','.join(['?' if self.connector_config.db_type=='sqlite' else '%s' for x in elem])})" # noqa E501 - values = [] - for v in elem.values(): - if self.connector_config.db_type == "sqlite" and isinstance(v, list): - values.append(json.dumps(v)) - else: - values.append(v) - cursor.execute(query, values) - - conn.commit() - cursor.close() - - # Leaving contexts doesn't close the connection, so doing it here - conn.close() diff --git a/unstructured/ingest/connector/vectara.py b/unstructured/ingest/connector/vectara.py deleted file mode 100644 index e94ff9c4f..000000000 --- a/unstructured/ingest/connector/vectara.py +++ /dev/null @@ -1,248 +0,0 @@ -import datetime -import json -import typing as t -import uuid -from dataclasses import dataclass, field - -import requests - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - BaseIngestDoc, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.staging.base import flatten_dict - -BASE_URL = "https://api.vectara.io/v1" - - -@dataclass -class VectaraAccessConfig(AccessConfig): - oauth_client_id: str = enhanced_field(sensitive=True) - oauth_secret: str = enhanced_field(sensitive=True) - - -@dataclass -class SimpleVectaraConfig(BaseConnectorConfig): - access_config: VectaraAccessConfig - customer_id: str - corpus_name: t.Optional[str] = None - corpus_id: t.Optional[str] = None - token_url: str = "https://vectara-prod-{}.auth.us-west-2.amazoncognito.com/oauth2/token" - - -@dataclass -class VectaraDestinationConnector(BaseDestinationConnector): - write_config: WriteConfig - connector_config: SimpleVectaraConfig - _jwt_token: t.Optional[str] = field(init=False, default=None) - _jwt_token_expires_ts: t.Optional[float] = field(init=False, default=None) - - @property - def jwt_token(self): - if ( - not self._jwt_token - or self._jwt_token_expires_ts - datetime.datetime.now().timestamp() <= 60 - ): - self._jwt_token = self._get_jwt_token() - return self._jwt_token - - @DestinationConnectionError.wrap - def vectara(self): - """ - Check the connection for Vectara and validate corpus exists. - - If more than one corpus with the same name exists - then return a message - - If exactly one corpus exists with this name - use it. - - If does not exist - create it. - """ - try: - # Get token if not already set - self.jwt_token - - list_corpora_response = self._request( - endpoint="list-corpora", - data={"numResults": 1, "filter": self.connector_config.corpus_name}, - ) - - possible_corpora_ids_names_map = { - corpus.get("id"): corpus.get("name") - for corpus in list_corpora_response.get("corpus") - if corpus.get("name") == self.connector_config.corpus_name - } - - if len(possible_corpora_ids_names_map) > 1: - return f"Multiple Corpora exist with name {self.connector_config.corpus_name}" - if len(possible_corpora_ids_names_map) == 1: - self.connector_config.corpus_id = list(possible_corpora_ids_names_map.keys())[0] - else: - data = { - "corpus": { - "name": self.connector_config.corpus_name, - } - } - create_corpus_response = self._request(endpoint="create-corpus", data=data) - self.connector_config.corpus_id = create_corpus_response.get("corpusId") - - except Exception as e: - logger.error(f"failed to create Vectara connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to create Vectara connection: {e}") - - def initialize(self): - self.vectara() - - def _request( - self, - endpoint: str, - http_method: str = "POST", - params: t.Mapping[str, t.Any] = None, - data: t.Mapping[str, t.Any] = None, - ): - url = f"{BASE_URL}/{endpoint}" - - headers = { - "Content-Type": "application/json", - "Accept": "application/json", - "Authorization": f"Bearer {self.jwt_token}", - "customer-id": self.connector_config.customer_id, - "X-source": "unstructured", - } - - response = requests.request( - method=http_method, url=url, headers=headers, params=params, data=json.dumps(data) - ) - response.raise_for_status() - return response.json() - - # Get Oauth2 JWT token - def _get_jwt_token(self): - """Connect to the server and get a JWT token.""" - token_endpoint = self.connector_config.token_url.format(self.connector_config.customer_id) - headers = { - "Content-Type": "application/x-www-form-urlencoded", - } - data = { - "grant_type": "client_credentials", - "client_id": self.connector_config.access_config.oauth_client_id, - "client_secret": self.connector_config.access_config.oauth_secret, - } - - response = requests.request(method="POST", url=token_endpoint, headers=headers, data=data) - response.raise_for_status() - response_json = response.json() - - request_time = datetime.datetime.now().timestamp() - self._jwt_token_expires_ts = request_time + response_json.get("expires_in") - - return response_json.get("access_token") - - @DestinationConnectionError.wrap - def check_connection(self): - try: - self.vectara() - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise DestinationConnectionError(f"failed to validate connection: {e}") - - def _delete_doc(self, doc_id: str) -> None: - """ - Delete a document from the Vectara corpus. - - Args: - url (str): URL of the page to delete. - doc_id (str): ID of the document to delete. - """ - body = { - "customer_id": self.connector_config.customer_id, - "corpus_id": self.connector_config.corpus_id, - "document_id": doc_id, - } - self._request(endpoint="delete-doc", data=body) - - def _index_document(self, document: t.Dict[str, t.Any]) -> None: - """ - Index a document (by uploading it to the Vectara corpus) from the document dictionary - """ - body = { - "customer_id": self.connector_config.customer_id, - "corpus_id": self.connector_config.corpus_id, - "document": document, - } - - try: - result = self._request(endpoint="index", data=body, http_method="POST") - except Exception as e: - logger.info(f"Exception {e} while indexing document {document['documentId']}") - return - - if ( - "status" in result - and result["status"] - and ( - "ALREADY_EXISTS" in result["status"]["code"] - or ( - "CONFLICT" in result["status"]["code"] - and "Indexing doesn't support updating documents" - in result["status"]["statusDetail"] - ) - ) - ): - logger.info(f"Document {document['documentId']} already exists, re-indexing") - self._delete_doc(document["documentId"]) - result = self._request(endpoint="index", data=body, http_method="POST") - return - - if "status" in result and result["status"] and "OK" in result["status"]["code"]: - logger.info(f"Indexing document {document['documentId']} succeeded") - else: - logger.info(f"Indexing document {document['documentId']} failed, response = {result}") - - def write_dict(self, *args, docs_list: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info(f"Inserting / updating {len(docs_list)} documents to Vectara ") - for vdoc in docs_list: - self._index_document(vdoc) - - def write(self, docs: t.List[BaseIngestDoc]) -> None: - docs_list: t.Dict[t.Dict[str, t.Any]] = [] - - def get_metadata(element) -> t.Dict[str, t.Any]: - """ - Select which meta-data fields to include and optionaly map them to a new new. - remove the "metadata-" prefix from the keys - """ - metadata_map = { - "page_number": "page_number", - "data_source-url": "url", - "filename": "filename", - "filetype": "filetype", - "last_modified": "last_modified", - } - md = flatten_dict(element, separator="-", flatten_lists=True) - md = {k.replace("metadata-", ""): v for k, v in md.items()} - md = {metadata_map[k]: v for k, v in md.items() if k in metadata_map} - return md - - for doc in docs: - local_path = doc._output_filename - with open(local_path) as json_file: - dict_content = json.load(json_file) - vdoc = { - "documentId": str(uuid.uuid4()), - "title": dict_content[0].get("metadata", {}).get("data_source", {}).get("url"), - "section": [ - { - "text": element.pop("text", None), - "metadataJson": json.dumps(get_metadata(element)), - } - for element in dict_content - ], - } - logger.info( - f"Extending {len(vdoc)} json elements from content in {local_path}", - ) - docs_list.append(vdoc) - self.write_dict(docs_list=docs_list) diff --git a/unstructured/ingest/connector/weaviate.py b/unstructured/ingest/connector/weaviate.py deleted file mode 100644 index 5039b2f99..000000000 --- a/unstructured/ingest/connector/weaviate.py +++ /dev/null @@ -1,187 +0,0 @@ -import copy -import json -import typing as t -from dataclasses import dataclass, field - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import DestinationConnectionError, SourceConnectionError -from unstructured.ingest.interfaces import ( - AccessConfig, - BaseConnectorConfig, - BaseDestinationConnector, - WriteConfig, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from weaviate import Client - - -@dataclass -class WeaviateAccessConfig(AccessConfig): - access_token: t.Optional[str] = enhanced_field(default=None, sensitive=True) - refresh_token: t.Optional[str] = enhanced_field(default=None, sensitive=True) - api_key: t.Optional[str] = enhanced_field(default=None, sensitive=True) - client_secret: t.Optional[str] = enhanced_field(default=None, sensitive=True) - scope: t.Optional[t.List[str]] = None - username: t.Optional[str] = None - password: t.Optional[str] = enhanced_field(default=None, sensitive=True) - anonymous: bool = False - - -@dataclass -class SimpleWeaviateConfig(BaseConnectorConfig): - access_config: WeaviateAccessConfig - host_url: str - class_name: str - - -@dataclass -class WeaviateWriteConfig(WriteConfig): - batch_size: int = 100 - - -@dataclass -class WeaviateDestinationConnector(BaseDestinationConnector): - write_config: WeaviateWriteConfig - connector_config: SimpleWeaviateConfig - _client: t.Optional["Client"] = field(init=False, default=None) - - def to_dict(self, **kwargs): - """ - The _client variable in this dataclass breaks deepcopy due to: - TypeError: cannot pickle '_thread.lock' object - When serializing, remove it, meaning client data will need to be reinitialized - when deserialized - """ - self_cp = copy.copy(self) - if hasattr(self_cp, "_client"): - setattr(self_cp, "_client", None) - return _asdict(self_cp, **kwargs) - - @property - @requires_dependencies(["weaviate"], extras="weaviate") - def client(self) -> "Client": - if self._client is None: - from weaviate import Client - - auth = self._resolve_auth_method() - self._client = Client(url=self.connector_config.host_url, auth_client_secret=auth) - return self._client - - @requires_dependencies(["weaviate"], extras="weaviate") - @DestinationConnectionError.wrap - def initialize(self): - _ = self.client - - @requires_dependencies(["weaviate"], extras="weaviate") - def check_connection(self): - try: - _ = self.client - except Exception as e: - logger.error(f"Failed to validate connection {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def _resolve_auth_method(self): - access_configs = self.connector_config.access_config - if access_configs.anonymous: - return None - - if access_configs.access_token: - from weaviate.auth import AuthBearerToken - - return AuthBearerToken( - access_token=access_configs.access_token, - refresh_token=access_configs.refresh_token, - ) - elif access_configs.api_key: - from weaviate.auth import AuthApiKey - - return AuthApiKey(api_key=access_configs.api_key) - elif access_configs.client_secret: - from weaviate.auth import AuthClientCredentials - - return AuthClientCredentials( - client_secret=access_configs.client_secret, scope=access_configs.scope - ) - elif access_configs.username and access_configs.password: - from weaviate.auth import AuthClientPassword - - return AuthClientPassword( - username=access_configs.username, - password=access_configs.password, - scope=access_configs.scope, - ) - return None - - def conform_dict(self, data: dict) -> None: - """ - Updates the element dictionary to conform to the Weaviate schema - """ - from dateutil import parser - - # Dict as string formatting - if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"): - # Explicit casting otherwise fails schema type checking - data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator)) - - # Array of items as string formatting - if points := data.get("metadata", {}).get("coordinates", {}).get("points"): - data["metadata"]["coordinates"]["points"] = str(json.dumps(points)) - - if links := data.get("metadata", {}).get("links", {}): - data["metadata"]["links"] = str(json.dumps(links)) - - if permissions_data := ( - data.get("metadata", {}).get("data_source", {}).get("permissions_data") - ): - data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data) - - # Datetime formatting - if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"): - data["metadata"]["data_source"]["date_created"] = parser.parse(date_created).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"): - data["metadata"]["data_source"]["date_modified"] = parser.parse(date_modified).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"): - data["metadata"]["data_source"]["date_processed"] = parser.parse( - date_processed - ).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - if last_modified := data.get("metadata", {}).get("last_modified", {}): - data["metadata"]["last_modified"] = parser.parse(last_modified).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - # String casting - if version := data.get("metadata", {}).get("data_source", {}).get("version"): - data["metadata"]["data_source"]["version"] = str(version) - - if page_number := data.get("metadata", {}).get("page_number"): - data["metadata"]["page_number"] = str(page_number) - - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - logger.info( - f"writing {len(elements_dict)} objects to destination " - f"class {self.connector_config.class_name} " - f"at {self.connector_config.host_url}", - ) - - self.client.batch.configure(batch_size=self.write_config.batch_size) - with self.client.batch as b: - for e in elements_dict: - vector = e.pop("embeddings", None) - b.add_data_object( - e, - self.connector_config.class_name, - vector=vector, - ) diff --git a/unstructured/ingest/connector/wikipedia.py b/unstructured/ingest/connector/wikipedia.py deleted file mode 100644 index 239e4636c..000000000 --- a/unstructured/ingest/connector/wikipedia.py +++ /dev/null @@ -1,208 +0,0 @@ -import typing as t -from dataclasses import dataclass, field -from pathlib import Path - -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseSingleIngestDoc, - BaseSourceConnector, - IngestDocCleanupMixin, - SourceConnectorCleanupMixin, - SourceMetadata, -) -from unstructured.ingest.logger import logger -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from wikipedia import WikipediaPage - - -@dataclass -class SimpleWikipediaConfig(BaseConnectorConfig): - page_title: str - auto_suggest: bool = False - - -@dataclass -class WikipediaIngestDoc(IngestDocCleanupMixin, BaseSingleIngestDoc): - connector_config: SimpleWikipediaConfig = field(repr=False) - - @property - @requires_dependencies(["wikipedia"], extras="wikipedia") - def page(self) -> "WikipediaPage": - import wikipedia - - return wikipedia.page( - self.connector_config.page_title, - auto_suggest=self.connector_config.auto_suggest, - ) - - def get_filename_prefix(self) -> str: - title: str = str(self.connector_config.page_title) - title = " ".join(title.split()).replace(" ", "-") - return title - - @property - def filename(self) -> Path: - raise NotImplementedError() - - @property - def text(self) -> str: - raise NotImplementedError() - - @property - def _output_filename(self): - raise NotImplementedError() - - @property - def date_created(self) -> t.Optional[str]: - return None - - @property - def date_modified(self) -> t.Optional[str]: - return None - - @property - def record_locator(self) -> t.Optional[t.Dict[str, t.Any]]: - return { - "page_title": self.connector_config.page_title, - "page_url": self.source_metadata.source_url, # type: ignore - } - - def _create_full_tmp_dir_path(self): - self.filename.parent.mkdir(parents=True, exist_ok=True) - - @requires_dependencies(["wikipedia"], extras="wikipedia") - def update_source_metadata(self): - from wikipedia.exceptions import PageError - - try: - page = self.page - except PageError: - self.source_metadata = SourceMetadata( - exists=False, - ) - return - - self.source_metadata = SourceMetadata( - version=page.revision_id, - source_url=page.url, - exists=True, - ) - - @SourceConnectionError.wrap - @BaseSingleIngestDoc.skip_if_file_exists - def get_file(self): - """Fetches the "remote" doc and stores it locally on the filesystem.""" - self._create_full_tmp_dir_path() - self.update_source_metadata() - with open(self.filename, "w", encoding="utf8") as f: - f.write(self.text) - - -@dataclass -class WikipediaIngestHTMLDoc(WikipediaIngestDoc): - registry_name: str = "wikipedia_html" - - @property - def filename(self) -> Path: - return ( - Path(self.read_config.download_dir) / f"{self.get_filename_prefix()}.html" - ).resolve() - - @property - def text(self): - return self._get_html() - - @SourceConnectionNetworkError.wrap - def _get_html(self): - return self.page.html() - - @property - def _output_filename(self): - return Path(self.processor_config.output_dir) / f"{self.get_filename_prefix()}-html.json" - - -@dataclass -class WikipediaIngestTextDoc(WikipediaIngestDoc): - registry_name: str = "wikipedia_text" - - @property - def filename(self) -> Path: - return (Path(self.read_config.download_dir) / f"{self.get_filename_prefix()}.txt").resolve() - - @property - def text(self): - return self._get_content() - - @SourceConnectionNetworkError.wrap - def _get_content(self): - return self.page.content - - @property - def _output_filename(self): - return Path(self.processor_config.output_dir) / f"{self.get_filename_prefix()}-txt.json" - - -@dataclass -class WikipediaIngestSummaryDoc(WikipediaIngestDoc): - registry_name: str = "wikipedia_summary" - - @property - def filename(self) -> Path: - return ( - Path(self.read_config.download_dir) / f"{self.get_filename_prefix()}-summary.txt" - ).resolve() - - @property - def text(self): - return self._get_summary() - - @SourceConnectionNetworkError.wrap - def _get_summary(self): - return self.page.summary - - @property - def _output_filename(self): - return Path(self.processor_config.output_dir) / f"{self.get_filename_prefix()}-summary.json" - - -@dataclass -class WikipediaSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector): - connector_config: SimpleWikipediaConfig - - def initialize(self): - pass - - @requires_dependencies(["wikipedia"], extras="wikipedia") - def check_connection(self): - import wikipedia - - try: - wikipedia.page( - self.connector_config.page_title, - auto_suggest=self.connector_config.auto_suggest, - ) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def get_ingest_docs(self): - return [ - WikipediaIngestTextDoc( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - ), - WikipediaIngestHTMLDoc( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - ), - WikipediaIngestSummaryDoc( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - ), - ] diff --git a/unstructured/ingest/enhanced_dataclass/__init__.py b/unstructured/ingest/enhanced_dataclass/__init__.py deleted file mode 100644 index 38c598c4a..000000000 --- a/unstructured/ingest/enhanced_dataclass/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .dataclasses import enhanced_field -from .json_mixin import EnhancedDataClassJsonMixin - -__all__ = ["enhanced_field", "EnhancedDataClassJsonMixin"] diff --git a/unstructured/ingest/enhanced_dataclass/core.py b/unstructured/ingest/enhanced_dataclass/core.py deleted file mode 100644 index 8fd79af39..000000000 --- a/unstructured/ingest/enhanced_dataclass/core.py +++ /dev/null @@ -1,99 +0,0 @@ -import _thread -import copy -import functools -from dataclasses import fields - -from dataclasses_json.core import ( - Collection, - Enum, - Mapping, - _encode_overrides, - _handle_undefined_parameters_safe, - _user_overrides_or_exts, - is_dataclass, -) - - -def _recursive_repr(user_function): - # Copied from dataclasses as this method isn't exposed for importing - repr_running = set() - - @functools.wraps(user_function) - def wrapper(self): - key = id(self), _thread.get_ident() - if key in repr_running: - return "..." - repr_running.add(key) - try: - result = user_function(self) - finally: - repr_running.discard(key) - return result - - return wrapper - - -def _asdict( - obj, - encode_json=False, - redact_sensitive=False, - redacted_text="***REDACTED***", - apply_name_overload: bool = True, -): - """ - A re-implementation of `asdict` (based on the original in the `dataclasses` - source) to support arbitrary Collection and Mapping types. - """ - if is_dataclass(obj): - result = [] - overrides = _user_overrides_or_exts(obj) - for field in fields(obj): - if overrides[field.name].encoder: - value = getattr(obj, field.name) - else: - value = _asdict( - getattr(obj, field.name), - encode_json=encode_json, - redact_sensitive=redact_sensitive, - redacted_text=redacted_text, - apply_name_overload=apply_name_overload, - ) - if getattr(field, "sensitive", False) and redact_sensitive and value: - value = redacted_text - if getattr(field, "overload_name", None) and apply_name_overload: - overload_name = getattr(field, "overload_name") - result.append((overload_name, value)) - else: - result.append((field.name, value)) - - result = _handle_undefined_parameters_safe(cls=obj, kvs=dict(result), usage="to") - return _encode_overrides( - dict(result), _user_overrides_or_exts(obj), encode_json=encode_json - ) - elif isinstance(obj, Mapping): - return { - _asdict( - k, - encode_json=encode_json, - redact_sensitive=redact_sensitive, - redacted_text=redacted_text, - ): _asdict( - v, - encode_json=encode_json, - redact_sensitive=redact_sensitive, - redacted_text=redacted_text, - ) - for k, v in obj.items() - } - elif isinstance(obj, Collection) and not isinstance(obj, (str, bytes, Enum)): - return [ - _asdict( - v, - encode_json=encode_json, - redact_sensitive=redact_sensitive, - redacted_text=redacted_text, - ) - for v in obj - ] - else: - return copy.deepcopy(obj) diff --git a/unstructured/ingest/enhanced_dataclass/dataclasses.py b/unstructured/ingest/enhanced_dataclass/dataclasses.py deleted file mode 100644 index a58fb3b79..000000000 --- a/unstructured/ingest/enhanced_dataclass/dataclasses.py +++ /dev/null @@ -1,54 +0,0 @@ -import typing as t -from dataclasses import MISSING, Field - -from unstructured.ingest.enhanced_dataclass.core import _recursive_repr - - -class EnhancedField(Field): - def __init__(self, *args, sensitive=False, overload_name: t.Optional[str] = None): - super().__init__(*args) - self.sensitive = sensitive - self.overload_name = overload_name - - @_recursive_repr - def __repr__(self): - # Support for kw_only added in 3.10, to support as low as 3.8, need to dynamically map - fields_array = [ - f"name={self.name!r}", - f"type={self.type!r}", - f"default={self.default!r}", - f"default_factory={self.default_factory!r}", - f"init={self.init!r}", - f"repr={self.repr!r}", - f"hash={self.hash!r}", - f"compare={self.compare!r}", - f"metadata={self.metadata!r}", - f"sensitive={self.sensitive!r}", - f"overload_name={self.overload_name!r}", - f"_field_type={self._field_type}", - ] - if kw_only := getattr(self, "kw_only", None): - fields_array.append(f"kw_only={kw_only!r}") - return "Field({})".format(",".join(fields_array)) - - -def enhanced_field( - *, - default=MISSING, - default_factory=MISSING, - init: bool = True, - repr: bool = True, - hash=None, - compare: bool = True, - metadata=None, - kw_only=MISSING, - sensitive: bool = False, - overload_name: t.Optional[str] = None, -): - if default is not MISSING and default_factory is not MISSING: - raise ValueError("cannot specify both default and default_factory") - args = [default, default_factory, init, repr, hash, compare, metadata] - # Support for kw_only added in 3.10, to support as low as 3.8, need to dynamically map - if "kw_only" in EnhancedField.__slots__: - args.append(kw_only) - return EnhancedField(*args, sensitive=sensitive, overload_name=overload_name) diff --git a/unstructured/ingest/enhanced_dataclass/json_mixin.py b/unstructured/ingest/enhanced_dataclass/json_mixin.py deleted file mode 100644 index 04f365a6b..000000000 --- a/unstructured/ingest/enhanced_dataclass/json_mixin.py +++ /dev/null @@ -1,125 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import InitVar, fields -from typing import Any, Callable, Optional, Type, TypeVar, Union - -import dataclasses_json.core as dataclasses_json_core -from dataclasses_json import DataClassJsonMixin - -from unstructured.ingest.enhanced_dataclass.core import _asdict - -A = TypeVar("A", bound="EnhancedDataClassJsonMixin") - -# Monkey-patch _decode_dataclass class to support name override -og_decode_dataclass = dataclasses_json_core._decode_dataclass - - -def custom_decode_dataclass(cls, kvs, infer_missing): - dataclass_fields = fields(cls) - for f in [ - field - for field in dataclass_fields - if hasattr(field, "overload_name") and getattr(field, "overload_name", None) - ]: - field_name = f.name - overload_name = getattr(f, "overload_name") - if isinstance(kvs, dict) and overload_name in kvs: - kvs[field_name] = kvs.pop(overload_name) - return og_decode_dataclass(cls, kvs, infer_missing) - - -dataclasses_json_core._decode_dataclass = custom_decode_dataclass - - -class EnhancedDataClassJsonMixin(DataClassJsonMixin): - """A mixin class extending DataClassJsonMixin. - - This class extends the functionality of DataClassJsonMixin to provide enhanced functionality - for JSON serialization and deserialization. It introduces options for redacting sensitive - information, custom encoding, and more advanced schema handling. - - Attributes: - N/A (No additional attributes) - - Methods: - to_json: Serialize the object to JSON format with customizable options. - from_dict: Deserialize a dictionary into an object of this class. - to_dict: Convert the object to a dictionary with customizable options. - schema: Generate a schema for validating and parsing JSON data based on this class. - """ - - @classmethod - def check_init_var(cls): - ann = cls.__dict__.get("__annotations__", {}) - init_vars = {k: v for k, v in ann.items() if isinstance(v, InitVar)} - if init_vars: - raise TypeError( - "Class {} has the following fields defined with an InitVar which " - "cannot be used with EnhancedDataClassJsonMixin: {}".format( - cls.__name__, ", ".join(init_vars.keys()) - ) - ) - - def to_json( - self, - *, - skipkeys: bool = False, - ensure_ascii: bool = True, - check_circular: bool = True, - allow_nan: bool = True, - indent: Optional[Union[int, str]] = None, - separators: Optional[tuple[str, str]] = None, - default: Optional[Callable[..., Any]] = None, - sort_keys: bool = False, - redact_sensitive: bool = False, - redacted_text: str = "***REDACTED***", - apply_name_overload: bool = True, - **kw: Any, - ) -> str: - self.check_init_var() - return json.dumps( - self.to_dict( - encode_json=False, - redact_sensitive=redact_sensitive, - redacted_text=redacted_text, - apply_name_overload=apply_name_overload, - ), - cls=dataclasses_json_core._ExtendedEncoder, - skipkeys=skipkeys, - ensure_ascii=ensure_ascii, - check_circular=check_circular, - allow_nan=allow_nan, - indent=indent, - separators=separators, - default=default, - sort_keys=sort_keys, - **kw, - ) - - @classmethod - def from_dict( - cls: Type[A], - kvs: dataclasses_json_core.Json, - *, - infer_missing=False, - apply_name_overload=False, - ) -> A: - cls.check_init_var() - return dataclasses_json_core._decode_dataclass(cls, kvs, infer_missing) - - def to_dict( - self, - encode_json: bool = False, - redact_sensitive: bool = False, - redacted_text: str = "***REDACTED***", - apply_name_overload: bool = True, - ) -> dict[str, dataclasses_json_core.Json]: - self.check_init_var() - return _asdict( - self, - encode_json=encode_json, - redact_sensitive=redact_sensitive, - redacted_text=redacted_text, - apply_name_overload=apply_name_overload, - ) diff --git a/unstructured/ingest/error.py b/unstructured/ingest/error.py deleted file mode 100644 index 8397caf6d..000000000 --- a/unstructured/ingest/error.py +++ /dev/null @@ -1,49 +0,0 @@ -from abc import ABC -from functools import wraps - - -class CustomError(Exception, ABC): - error_string: str - - @classmethod - def wrap(cls, f): - """ - Provides a wrapper for a function that catches any exception and - re-raises it as the customer error. If the exception itself is already an instance - of the custom error, re-raises original error. - """ - - @wraps(f) - def wrapper(*args, **kwargs): - try: - return f(*args, **kwargs) - except BaseException as error: - if not isinstance(error, cls) and not issubclass(type(error), cls): - raise cls(cls.error_string.format(str(error))) from error - raise - - return wrapper - - -class SourceConnectionError(CustomError): - error_string = "Error in getting data from upstream data source: {}" - - -class SourceConnectionNetworkError(SourceConnectionError): - error_string = "Error in connecting to upstream data source: {}" - - -class DestinationConnectionError(CustomError): - error_string = "Error in connecting to downstream data source: {}" - - -class EmbeddingEncoderConnectionError(CustomError): - error_string = "Error in connecting to the embedding model provider: {}" - - -class WriteError(CustomError): - error_string = "Error in writing to downstream data source: {}" - - -class PartitionError(CustomError): - error_string = "Error in partitioning content: {}" diff --git a/unstructured/ingest/evaluate.py b/unstructured/ingest/evaluate.py deleted file mode 100755 index c6446ac9d..000000000 --- a/unstructured/ingest/evaluate.py +++ /dev/null @@ -1,349 +0,0 @@ -#! /usr/bin/env python3 - -from typing import List, Optional, Tuple, Union - -import click - -from unstructured.metrics.evaluate import ( - ElementTypeMetricsCalculator, - ObjectDetectionAggregatedMetricsCalculator, - ObjectDetectionPerClassMetricsCalculator, - TableStructureMetricsCalculator, - TextExtractionMetricsCalculator, - filter_metrics, - get_mean_grouping, -) - - -@click.group() -def main(): - pass - - -@main.command() -@click.option("--output_dir", type=str, help="Directory to structured output.") -@click.option("--source_dir", type=str, help="Directory to source.") -@click.option( - "--output_list", - type=str, - multiple=True, - help="Optional: list of selected structured output file names under the \ - directory to be evaluate. If none, all files under directory will be use.", -) -@click.option( - "--source_list", - type=str, - multiple=True, - help="Optional: list of selected source file names under the directory \ - to be evaluate. If none, all files under directory will be use.", -) -@click.option( - "--export_dir", - type=str, - default="metrics", - help="Directory to save the output evaluation metrics to. Default to \ - your/working/dir/metrics/", -) -@click.option("--group_by", type=str, help="Input field for aggregration, or leave blank if none.") -@click.option( - "--weights", - type=(int, int, int), - default=(2, 1, 1), - show_default=True, - help="A list of weights to the Levenshtein distance calculation. Takes input as --weights 2 2 2\ - See text_extraction.py/calculate_edit_distance for more details.", -) -@click.option( - "--visualize", - is_flag=True, - show_default=True, - default=False, - help="Add the flag to show progress bar.", -) -@click.option( - "--output_type", - type=str, - default="json", - show_default=True, - help="Takes in either `txt` or `json` as output_type.", -) -def measure_text_extraction_accuracy_command( - output_dir: str, - source_dir: str, - export_dir: str, - weights: Tuple[int, int, int], - visualize: bool, - output_type: str, - output_list: Optional[List[str]] = None, - source_list: Optional[List[str]] = None, - group_by: Optional[str] = None, -): - return ( - TextExtractionMetricsCalculator( - documents_dir=output_dir, - ground_truths_dir=source_dir, - group_by=group_by, - weights=weights, - document_type=output_type, - ) - .on_files(document_paths=output_list, ground_truth_paths=source_list) - .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True) - ) - - -@main.command() -@click.option("--output_dir", type=str, help="Directory to structured output.") -@click.option("--source_dir", type=str, help="Directory to structured source.") -@click.option( - "--output_list", - type=str, - multiple=True, - help="Optional: list of selected structured output file names under the \ - directory to be evaluate. If none, all files under directory will be used.", -) -@click.option( - "--source_list", - type=str, - multiple=True, - help="Optional: list of selected source file names under the directory \ - to be evaluate. If none, all files under directory will be used.", -) -@click.option( - "--export_dir", - type=str, - default="metrics", - help="Directory to save the output evaluation metrics to. Default to \ - your/working/dir/metrics/", -) -@click.option( - "--visualize", - is_flag=True, - show_default=True, - default=False, - help="Add the flag to show progress bar.", -) -def measure_element_type_accuracy_command( - output_dir: str, - source_dir: str, - export_dir: str, - visualize: bool, - output_list: Optional[List[str]] = None, - source_list: Optional[List[str]] = None, -): - return ( - ElementTypeMetricsCalculator( - documents_dir=output_dir, - ground_truths_dir=source_dir, - ) - .on_files(document_paths=output_list, ground_truth_paths=source_list) - .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True) - ) - - -@main.command() -@click.option( - "--group_by", - type=str, - required=True, - help="The category to group by; valid values are 'doctype' and 'connector'.", -) -@click.option( - "--data_input", - type=str, - required=True, - help="A datafram or path to the CSV/TSV file containing the data", -) -@click.option( - "--export_dir", - type=str, - default="metrics", - help="Directory to save the output evaluation metrics to. Default to \ - your/working/dir/metrics/", -) -@click.option( - "--eval_name", - type=str, - help="Evaluated metric. Expecting one of 'text_extraction' or 'element_type'", -) -@click.option( - "--agg_name", - type=str, - help="String to use with export filename. Default is `cct` for `text_extraction` \ - and `element-type` for `element_type`", -) -@click.option( - "--export_filename", type=str, help="Optional. Define your file name for the output here." -) -def get_mean_grouping_command( - group_by: str, - data_input: str, - export_dir: str, - eval_name: str, - agg_name: Optional[str] = None, - export_filename: Optional[str] = None, -): - return get_mean_grouping( - group_by=group_by, - data_input=data_input, - export_dir=export_dir, - eval_name=eval_name, - agg_name=agg_name, - export_filename=export_filename, - ) - - -@main.command() -@click.option("--output_dir", type=str, help="Directory to structured output.") -@click.option("--source_dir", type=str, help="Directory to structured source.") -@click.option( - "--output_list", - type=str, - multiple=True, - help="Optional: list of selected structured output file names under the \ - directory to be evaluate. If none, all files under directory will be used.", -) -@click.option( - "--source_list", - type=str, - multiple=True, - help="Optional: list of selected source file names under the directory \ - to be evaluate. If none, all files under directory will be used.", -) -@click.option( - "--export_dir", - type=str, - default="metrics", - help="Directory to save the output evaluation metrics to. Default to \ - your/working/dir/metrics/", -) -@click.option( - "--visualize", - is_flag=True, - show_default=True, - default=False, - help="Add the flag to show progress bar.", -) -@click.option( - "--cutoff", - type=float, - show_default=True, - default=0.8, - help="The cutoff value for the element level alignment. \ - If not set, a default value is used", -) -def measure_table_structure_accuracy_command( - output_dir: str, - source_dir: str, - export_dir: str, - visualize: bool, - output_list: Optional[List[str]] = None, - source_list: Optional[List[str]] = None, - cutoff: Optional[float] = None, -): - return ( - TableStructureMetricsCalculator( - documents_dir=output_dir, - ground_truths_dir=source_dir, - cutoff=cutoff, - ) - .on_files(document_paths=output_list, ground_truth_paths=source_list) - .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True) - ) - - -@main.command() -@click.option("--output_dir", type=str, help="Directory to structured output.") -@click.option("--source_dir", type=str, help="Directory to structured source.") -@click.option( - "--output_list", - type=str, - multiple=True, - help=( - "Optional: list of selected structured output file names under the " - "directory to be evaluated. If none, all files under directory will be used." - ), -) -@click.option( - "--source_list", - type=str, - multiple=True, - help="Optional: list of selected source file names under the directory \ - to be evaluate. If none, all files under directory will be used.", -) -@click.option( - "--export_dir", - type=str, - default="metrics", - help="Directory to save the output evaluation metrics to. Default to \ - your/working/dir/metrics/", -) -@click.option( - "--visualize", - is_flag=True, - show_default=True, - default=False, - help="Add the flag to show progress bar.", -) -def measure_object_detection_metrics_command( - output_dir: str, - source_dir: str, - export_dir: str, - visualize: bool, - output_list: Optional[List[str]] = None, - source_list: Optional[List[str]] = None, -): - aggregated_df = ( - ObjectDetectionAggregatedMetricsCalculator( - documents_dir=output_dir, - ground_truths_dir=source_dir, - ) - .on_files(document_paths=output_list, ground_truth_paths=source_list) - .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True) - ) - per_class_df = ( - ObjectDetectionPerClassMetricsCalculator( - documents_dir=output_dir, - ground_truths_dir=source_dir, - ) - .on_files(document_paths=output_list, ground_truth_paths=source_list) - .calculate(export_dir=export_dir, visualize_progress=visualize, display_agg_df=True) - ) - return aggregated_df, per_class_df - - -@main.command() -@click.option( - "--data_input", type=str, required=True, help="Takes in path to data file as .tsv .csv .txt" -) -@click.option( - "--filter_list", - type=str, - required=True, - help="Takes in list of string to filter the data_input.", -) -@click.option( - "--filter_by", - type=str, - required=True, - help="Field from data_input to match with filter_list. Default is `filename`.", -) -@click.option( - "--export_filename", type=str, help="Export filename. Required when return_type is `file`" -) -@click.option("--export_dir", type=str, help="Export directory.") -@click.option("--return_type", type=str, help="`dataframe` or `file`. Default is `file`.") -def filter_metrics_command( - data_input: str, - filter_list: Union[str, List[str]], - filter_by: str = "filename", - export_filename: Optional[str] = None, - export_dir: str = "metrics", - return_type: str = "file", -): - return filter_metrics( - data_input, filter_list, filter_by, export_filename, export_dir, return_type - ) - - -if __name__ == "__main__": - main() diff --git a/unstructured/ingest/img/unstructured_ingest_cli_pipeline_diagram.png b/unstructured/ingest/img/unstructured_ingest_cli_pipeline_diagram.png deleted file mode 100644 index cf2c94f4779e6791e3a76e0d92bc28b30c3e0148..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 91991 zcmeEu2|Sfs_kT$t$xM=Y9x~=g=9!3)P-f>CkMWqt%$W%xqKt*2qmG$WW>SXCga}cY zXa4&^-QM@!_xIla{NDTdeJaj*p8f3o?6ucgd#&$x?ZU3ADH5DKd-BMUBLvDya+*hu zU=4zQ_i;~vD~paf{ovP8l%}H0k*9CZ%^W#$anxB}*V)$H+zN>}!XhYp@QFo$&(a>{ z%pxeqA|PPm;J^d7GO=(nu|@INA)LV_aNpJ*Ze?zTfFHCG;1du$&(C+BPv9EAAd8^% z{trF@o{JYmZXC2Xu|U`zc6i0!(+Y_+VGℜ^6~bv0pJUv$AuxM_IB6%7E|6cFqVS z_zfDm=l76DoC z8EIvM0KXIwW;XWFBeIrG_O{@P0zbbn4E>l0E?hJ3!gN& z2mKY4JrpJQVrK$gXkWxvfppIEA9QVXC{i^&T}v0!%NG0>+#KYz;MXtMSt|c?Z$J99 zbysvUaj?8%58oFY-2I?iet!N72c5z_4z7p@A6~F<`Z3Pou+Rl9E6>9h;s-AaO~=B; z3XVV>_I7Z|+1?)MY~}Ee7tQSL>=0&$>wDOviIbDP+dnolw?`h%?y!vmnD{^R4qebP zGeQ3Ts*V-h87K&(kr#yyUk_@ngs`%({4p{=-{BowlOK%_FQ6<<;P!3@&+lLQbyW`J z_(Pgb_V%E^Ul#4x+dmrFy2~Pvke2?CfP|hAd#Er0nOGB#$#AJpvL7}Y7fJIt5+akdQ ze(=%B-o*~SZ}UKtfvN7B3;cIW_}eV@C-z^9@;8AhB7lWDJ9z-1y8X7;Ki1mvmv#Pe z%?W`takg^(M`bZN)H#cv-Tk8p{(d!oy#l$wtMFr*-GjtanE{1DE6y*P)h_f`Ghwfvn$ z{Gl3N{Jqiv1=2dyL3<}>OM44@I}@b*Z&&|G?S5;cYH#mwsF~IXXJ?N?|7GIhZ2ymX zi*UDc)&(v3c!ay^>d-A5to47<4yfFpQ*lVSf3Ca$-=8+0KcmtA=T+r!`oB0VIeRD~ zP%trbwujsVWNN>7uiv~A@*oJ|W!zpZG5xTJbOLO@L46pv|xT4fapg{;Mnd|Fae%2QT@Hv=P4cG?43{adr$SC0SMs~&6+ff}htvPXbTtv?_D%o<`DwX)>{dJo9`M!7(#6ik%FY6SVt=*x`NCh=nh&F* z|F-5oVa`-{$-$%=wTr`M2i0 z4^aR8%=rgp^c(K}Pn$D4>Myt#!~VL)`+Fip#|dDCKSQOzT396f*If$!-|uz53rqhW z0F4J@LIWKFwFeh|2L=BLXZM3~{kPukf0Y{hPY3OPpNm|#0$e|zvn6Cis&_U+H)fKj!xK!D-{0Y13B8R~cH@EcqRF){xEC_n569r$0N{DOyw{(m;g z&wt3y{StcqPX2!}bNsWx$9{hH_X|Gwe;<9w*}K^x?M>jo!2ZzQ_@G)G=qC7QjoJtQ zoDdF92o$JI0BrmIh5a8$dr+tW8bP%~;1|jR<&3a}9z3w!gHep&R=~vhfGY*nF3t$> z8A$q%S-byGd;UZC|1VTH>i8sH+WFE^V6asn*}YPN;#B?E5HxHhl3p&7`h7*vfy9bfP<9-0%>K3IFE8b zn1QlCh=u=Q{Rj5_16^bhxS*tdMIO|c@VMBaoSj_EoL!utffauA0e$@KT^0VT)aO6c zA^ah!@Yiq%0>7`R_?<)kYi;llLk{gI|JEV=O`(5i^8d7>{y$j2KQ5?(?DbC+R6*4_ zAeJ~Rr#dLf+ApE{S%>w1tDx#nE6aWo^Y`05e$*rVg<#9SfU@j|*ndN1IrOZ*1dj)y z2B<*=_5UVzP@%3JsOW*dI$Q2X{rjf^pz8;*#Lta>KlA`8zcWb|3^0a!?yo}O!oIE)4_o!7|0|AWlUKFU>4Aa z-*P|*{{@xT!2KUI0a@YCRzL%+Am0s+?ChtGd_VJ9#DSLwI1{ut0PJ_)=|f$aSlL03 zK$`+^@Q_zG**{2PZw{^;s1QhUe}k2wJ>!}sz_g%;_O~Jw!pX`62~I#kTZ4%m^wZMh za4Xt}yFf4dP}Yxh8Z`K^gZu?{h@X!B5T*X3B$ofjL6=`q>Yoq(-&D8#!vp`~+|`d0 zE5Ds^`!&|uzx(4%-NB8+2lipf{VN(m1ow^{p+BN5CwSP``xzWCvGe`@DG32DFW}Pm;n2E&RFKQM z6JynRWr)6*9k;uE(y@_7ry)R+VR)jH7EQB*^sZV&5KSLDIw1hBB+QOiB+|w5zwbKm z*xO$>$T+VO+ka2?LHhR~hnQX_8BF7A4x{7#62!RuUrFabkz-#Xl6sCprhg^I4q+mC zCGOjS+fPeZ;7x_SDMJsh|mwGH(e9^K{AcK22X;GeNn%X~hzJ^%cSNcJ))U|o%P!^DX2Nx*j}7JrlTr96JtiS|bL<14tupjHTe@cw&nEYi*d2EUZNT|7aRTOBhd58EK(swC!^a5q|w7f9$ zcXU#9p+}n;t(LFvvj~I2jjwLCgZh)4I#dNce#=>OLPp!;l&O^?D^J@;cUMcNyg!Q? zEd|nTKj=4&INuPS5n~?sQgUyjV`|>s&kGgfHIyat&7(s!Rn8*0rLc9ayu;^d`O^Jw zL)(iysd&_x}LX;Kng6|2}ZnU?Lczvs8IGX(_ zU#Hr7X|uPsM%vWQA&@%qN}t5?LqhV*-JNfKl5j6Hk62fc-&*BpB0;yh-)7g8KvH>| zYFE0vX$jA$h2&QMlxu0FL6$nxAS~{-i>UWXfsrkb%VQOxq?+X%3yt7son>ELiU!42 z9;=^E?J!}0dvTw!Sb8s*z7RQe>)7e!E2hebEA&L;#jN-E6Q_yy#f~|%R?%C1PjYu5 zrae@0d+g2;C)e^;gQpvx*d+UrwL7crJe6~u=X}@4CM`OAzM@h~tLJUo{dSi!WRQm6 z11K+izSt-NgKxj4+bu4iJa4a;faILGIkI>+6G`X((Z9WNbm!|7-QK$5((^oV+d%Ys zu5k5Zk7gCa&h1HFKdY}k-@KAW?=Pi&9(u%BySMXA#hX(samS;>%McYRI`nYko8R8e zmZf{WRk<6zwY%E0j_;&#gi`G)td#eFE_k6*2ZwUA5lmwpYNGSqL!VfMt!Ovjg~E8f z$M93vK!-W1yMBw!Y)O~Izvy*va_;i_E<{YdkMVUz%{^;`;dU)$>}{KWc~UBY-;|f; z)beh9bbCrTHB@}gq&=y2dva=4r`GfFXV*bnofdpEU7vB%49ll}J71F8-`0b!WKK^m z0FB8LnSb7OuX1?au3g1wvom1f(c=0phq}k*B_Ps=-CG|633*&}T=Qr1Bvu|fUFI(o z!(|_`^PZqi!t6lm@G!cV@-q|Nh7#&E^+<8Q-`-B@&EeVW?LesCHQCmOKTE7}%x-jL z>@H8;W{S4t0h1HnnlCllNco;vsd3H|9e%v=in8Wi)fa11=V zr$w@#uYIljHc~ZCUc^2z(cj_gN+oI!Ub(gJ%qfOJJ!VfidOZT_TNdda=C|;$q>WX9 z6HQXY5I-ISmPnAZz(`rHv;?GA5zD!!$P+vH4cKOIEIocGjjQfPw5Hqi!%&zPSc8zr zlqf!FX(GwhXEEe5th+r=U0=vV#)wQM%gAKAEWcNz91nb;KXJC#60G-@5TW5x!? zJ2B9n7miP*?dKSmrJ_Gh(r#n4H6SBF+{h+2hS#1axicfP7^f~-AT9FLA($(TWtYon zqeZ4K@k_J1uM?_ULy=s+1~>yD-?cUii4q310nkkDH$8MKu9@FAI{a!pzT6zW(e&)n zOC*@)Gm;i%&mP|m;LWO#*ga#wc)a@8``7!5`-WzOebxBazL~64t-dHS z7*}fSd&g_?Kx}qu6n4H7zn>N*V-(hbRw~5KkGOBX`Np)==<=u}7L{Nt(;(7hz|ijN-EC~?cI1DEoYY@5K| zU6#S7$w-twlP%rdRKu^x<%tcsKt(%jkPIudkfArgnWzu=hDMb;wwz2(QR@t{z_g4a zQcjs#2|6ZK4&@}jUEd=wa<32hUblTjfo$t+#W+)A0s@=h86}T<9X^Hj*3Hq)>kOG0 z8*LUX9dVAXU*`(qq~$Pn7ww$0sL5pt#4B1*oyu(Fs@9$`R*N)81r417D!iVW0M8Z8Y8wuvr`S+TN=taJmZ{M@Hv;A3;4&UD0C1)*%*T>2fZSZCFv*cd+^BCWa7ezi#*tb`*8x>@0 zovhz!t5cs&<#}UJi`LLqqz+7uTdt*Wo=TQCjXrCp7Jf&9pN$~Sl>5|yv`rbFt5U~G zXLmjm$?Rnv{4CPp5&jrUQyY)@xcxb_LD)I1qvnjb1f+P(H8j}~pI#g@?O;Cou&Oih z>9y_=MX_<>;By?Uc9rwjCGsSgmTxaSKu2W@%0F&_krG%}Yz|yIMdCf1vq*3ve@~Wn zF_u8`YxuX%x2GQGrB{sIlsg+%Y9bv%tX+5F*a+El9lm$v*9sf{lxG|>oP^_F*KQk% zKDW)Zy;ae2tRNq^bb?E?-Zi;hSn!ms%#&TOvGXD~WY9`& z8fI*% z2}+H`C}ljXo8&vvXo94CA{}(pbC*QL>VE!`42oDaYO=oTle%86iHfPGm{6&MS_CB{oqO`&)$^+Wz`#&^He4OvJ)$wd) z7_{XTNkk5^^N+nZ!e0HpHCLi8>+>z2t&7;UdSfb8xRK7!n~HRI{y8JuEBbCMHwmiy z3`#R$IqYjl@yna**2XrL<9QYBuQKeln@*^FbRtYLj}O6cJy)KX+FBrPO4gI;SIBm^ zd?2C8z;)Cv-ZosIQ$q3vEd?KY|tuT;?4w3Wzp?eGPty{=-YWW zijF+f@N8q~(=L&99g2}^$>_%x2;3G7eqdeMmaBT3rUWOKqe)QolL`FhMt*qn)4Q56 zxM)(vd|QcFFLS;oPEE_*#}BzAQtcmw zgdmpyN$X+!;Dp8->ppn=mAjKN1-2l2=qIVA_RncJMZ!_2)1j_1W>(T-6Exvuruqm@ z-7ls3=pGhU!|7gxB6gss&M|a-LvlNhI;E}TpmzV%0+xu`UYp(Z;uQIpJ5@CKp8DkG za`!k@T@+?xv(D0$n`qXxhK}M5+Ev;zlvl}ILsS|lej=pGw@Q}PXT4B#E#|B*4mB;; z30WO)6bXuQ5~J*Jw1ZV{kD7V#nx2b=f`Dqj!fr*2Sys@y-tw6Ku&CoD8J=UBzDevH?h<|sD$yIka;X}wc?fM>UY5ik$S{s_&|>I3>0+P!3L2fQA2xm{kjjzt_KYD z7`+IMr<>6<$`RqF56H{#_4GJ{t(;$Xk>C6+hngL6Q|h*qH!qwtVISVsVTo{%$)D}pt6&`BN0zbO}^7yeajO|YhgM+uSWHm z#cJ1Sm{;5F0Xu;&-()dK)^Sw3z7+8vPesIMPaW|G=B{u?0dr*}`T>pd&7lhY0GD#K zwR%7zG8H~3=_dAt{dwyO8Se>w3y~A0x5`|;-60n3oITl2?3|>+cQR#gH-YSFFFWII z6#MlpWmcZ(NYc0AFFz(*#Ut@WwQZ~O2i~=$-O&&{jnzQFZ`M6!n&wQgLY>N$%7o&7 zAJo!b*yLS90&)$OtMf{N@y0R6yh_(+NaN$?FS8C!=UEe?@Iux#DMP?3jS3XOQ#(JL zj>heeb}dgImxwU1c;Pk~*cZoKUs&^~@pN(PaZ2rxEAIIx<%fyI6ZOjte8<%yWCYfy z8v~qY$a&3a1a0pOzjYQb7+@IW8riv1cUI<>9wD_m8^s#}nbym}vGa*Cu1XaQ*Tn;> z>Gg8dRTVHaUTCJt%aQG4d`ZSUCn3pv#w1WFCw<|-c+Lsn%r6eW2D9`7L}U<`UftDr z@J_S>!TUv;u(2fYYc)l-=$d954{4C0fKjaKhpiAZ>3baY`ii(g$hE${Uq>M}h7HY~ z=vM->Z7{58O(Y+EB^1dBI#$<^)Thmd^;~mZok*C?M=m{hHyO|>UkvNNPU{+2b@KcA z$3@B&F_Vi}SqZkVhUm~vh_h4=E2!F8GJoM zEIB5WNQ!KgB6ohpeq?Kv1YPy`@szGp0(os#uTODMyr-Wsjp#D$O1J>`I=jponX2p{ z^YTO8=NZ!BD6wgaR2yDOy2p*L*bhc6*OnZ7T@@GUBtBWsx7t^gc=a8}koPRc;9>d9 z$RtUd{JHeY$uGOF8d-RO#7P(Vp0t8kw}radQ>RogkNfTP!X;K6ciW`!2(8ToXA&@7 zHffs{dqF%kypLxi(eET&{G zSPnaG^@yCCHPt@GjedTGV~V`ZZY=(7GSzTUorP~@S?i$MD`dC$+<3Ht4|kK`G;PEWE5eMZZsV;JE4`AtE16DA2SrJX-c_Ek!b2)UW5 zKwFXI+QYArWp1wmiFLBJ<-I~*;+Nqqg#u(L#`cbU=5G@qL)j5ECFAv{r@p-?EnT+y zscY0GEx}Sf(7vP{7L4>-9I787RlE2wdpc#q2Bc7vJy(+6fcK~T{DxE=IRf%R%FO68 z6?+O^UA3!Eyf3kWS(sr7MSwJ*4Xge|qQd5z0qc%#^GOgY;Po4B7{a#Btxrj=e+;Bf z$~(Sx#hp3i^n4ChYec94u;=q~aTl9xtT2bF!R*@CsHkW7&R zAdYsBB|`4dN8oo}<(L|xkL4#tiwoa(3{PGcG*x9OHjNMpfi5y@SHr>1tc!yuUK_ zU$YxQNo>B4xr_5uKS6AzTZ=l0wfrp2RIeWZKDdImt<=44EI~2-`pV~MXMhr zoEIqSmQ01!J#Lai%j8y)%}GWD?gMp0&JB3@i%%OUAMUr#y z&X-zTW0ZK+t}~pD=(^4^<9#4{I&uCST&xKuBDA=~buGUXc~fiLUVW??Qk$pBS`Wz; z_J<6v)V)K9B?#e@Q?agW$^SwRlLN5*`2n^|OZrI9`>wnx1^d${7dqKKT=&X_amDNl zsAUcPi3r;~0sw)?g>xx4{>2QwviZHzgWtdK-4BeaaDFcOKAXNf8 zz-a3ofUuqiYFkn-2-GNol-_^v(&wBOC zbjfmtN3R;@HAu;FqC zgV)x?ZU4Te$83%}bynFr@4l<`>5Ct4nFCZ^pGoMlP{7L;1_uaS>TqumJgctFc1n@{ zi>jvrQL^N|Lytdn=)p+deYNoiJ$AOox|mx<{y?i~i94A4nELSVXg9qMxNt38gcA4a zi{p9Hcp8{4oPd{^5QuE15{z8KUByD=7+s`@;b_JZx)EN%i3>k%aCX)B`EO`+%xw#j zIpVqFE#Ws5$D(Owp>_kJSQ;-H1I50yxnGeC;V#w?EMvgCf7;;Zxsw}PX4O;R@7a_B6h45HCb5Z#OpK{bMHck z48Vsl_t?|@Ux{bx+rtCgKKkR_l@y&TmVK;*ET=3L*IH~QmMRXmHbiiLdSl^zP8QSn z>j=P4#2IcEa=yPBe_4W}_WEOKBHgq873Gn88#&L;t_I7CGXSn)L-r%mVoL9|;SH8> zSzbQ2czQk|tY^gOXJ0WBVFySN;bNVKBtCl@IOn7HkpV9gfF7vh8E4}7(}M;t6`*~* zn*lVzy3~*D&jIu{o#dq)Ar>G`u)zd+rjIcU7WjKEp2CWUtV6^+jQF76VO+WXuaDip z&t8ee z!uEd#1WJpa>rHm>pcx~P;2nPdd#fd}^H#*F+w|`Ufo|`7Ow-?=&6nfgrP6O)f>*LV zh`&OVOai`g993P0CL=}EhYQ`kB-iAsF2$Bg&&L8D)xt`i-k%K6nCi1Wz?y`vs;5gE zI{~?{HFERscPssX+Mj(;d?gw@eoGA|ZG0Kr&BNw<9|(z1n!aV#dKr?2l(@$U|7J)? z=Stq}PX-%2S1^JjgVCwR)4WB`M*)U3t4oME=oUbNrV0YASG#;E(BkIpwS$&D7alj(`I;ipN-(Cp-@rL?&nm-mIG6=d$ z!}yyfN~3*%&$~4s} zi=WyiWqfrwI$!*Fa@bky{rAzpY|R00;FEd3D&XZJk!8UkG=#~IAsY4_g5qxu;cCzP zqX8|flpp4YsO$h*H_SHB%!SKM=Pzbm;yp(qcrf_eaH)j(bWeH6a*OHYHeng0~k=n5o)J!zJ_()KXbK??tOb>EMHgci>x&;Pu{B7<4ercG3;d%X! zC$79_Q+Qy<69Z)lqhhX0@zTjI<^olVF28ZK8@ghH{fj3J)OC4HVWRWTwK!lxyx3mE zH~A2Tc-W?$f>il>7Obzi03|5_`TS#(dl`!R(QR=d(fqkoUfnFPIWPdeR-*f#HV5EI z7^`*lf|a^Bo1&OL|LaK96Cri?M8V zJXEZ(8g?b>`f2}JlBHy`XuWb(3bDyTLHvu66o#lQ z>OI59x0utv=zq4ZS$SeFbknYUQWahP@i=dv_i9Oo?NhJeFVTr3P?S0K)~yD$)M~Vg z-lv7U0hx*%+5AvyN7jp<2f-RkfPu3rykN2Lersf{X6jOJxf*Jc%g6)nG`ij(eD4Wn zz*=5>JyL5ABKg`OHfM@fo%4o!4U! zQMjbX+;GI|zRw+rXK&An_1r%T7)IabRe&2#PXQj#jv;<;cda%x#&Fxr_FY&O$GM3By=mkpb%IrFI4T;a8bW)>F_J{PP=jC1Zzjgy{GqMDPx?AFCf78ph6DQKuy#~6%%khs>n>z&OTq+nW z$=I3GTobNYh-yD?@6j4{3H#jG!(9hYpe{zm+Yn*vyZeK%?HT%FxHfP#T}w?Q$75QS zZUD-La%ws}Ka|`n07Sm*QmKhmsN-x0@vYYO?saH6+wDOCz{4Fxv4z`<){kz4e(SGQ z=qh`j*lEvhg$P$Y+yE+hePA4-SIyo|&q{tT#w)f0ET6=C{k_dzdnm0HrmS7B&r2^D zzyonV*;`+>I@*Zz!t1#HM4{wZu;!I0$jG9*= z)tgKe=FC_O%$Zf$Mso+kh=FcX=?|^)MCZFt^G}<6``k&QD_m3)2Wn>*;$57+W^bV z(4-yRDz(qnc@3H3RJcx+>Cz)b@qFW|QQs}0+RdJ#ll{K)Hbyr)rOI9;u9g9w_-4~I zIiN(R0H5NsAt}%yHVyLFk$2m4TYZgo-A94xSx79Un0mrrKYa4kquu88Ja3@6vcbIR zY0FS+h0G`4+#5wOpL_l^kU}>;Br0(97E97qIJruPtm=&s~we_t?6E0%QZfKwWg$gVFFK8j*pLn%0(s<>XgF>@K054#t_7f z_7ma4A~$;;l)rmUY7K>WYDAI|Z?KJ@#H)9)eNdLoyC*0MXf=r@SkA^mjGs5Z@`ILi z&)LV+Cl#!!S`EHxUYmw$8QLLJ%1qN#iPNi9OeU7tGq?7?dQw&8t3#+L8R;$HuP?#M z+XS(*N%_TQvg1!`NbJ zj~S#uN3FkZVpWt)(ABD$(63CL#sHm7^a-ef$7Ry;uK<94(Gd)~L!9@g`a7Nmd5{ zEHupW@&zH@@j_%tgGDm>%6+-@J`%w=sqDwCb3w$F_y*H5?Tw%SVG7z#H==2TGru&Y zdmIaBKQ#^&rfj$k#=ikpeVWQ@rS#=?&<<~M8 zbxdi&;`I3F?b(G!IwLjQ?bPuBd5rBD_I2Qbw^Sr*5XYWgc{C4`Qf0=@6^BX8b@jXI zqyfS+Lr9U<11>@}-Qq_E$qd$SZg?TTw)?r_3l+-#4^!Q?e#tR_M~-1%ov&(kUZKNm zA~9c@Wq@Jk&h%V?e~AA$`+>E*0}C(@dJmE>d>{pA%uc-RYU?!xOjm*mtR9(0te-@Z zf?K?j!zth=m6;+=UmiXAIHHM0vpp>+AoA+w+C}&vLU>~#Xxt0)F~K>Xc|2rD6J_vd zi6%Iq^t82rKyV+rRX+PfGpy%6-pN-vv~4_n!h!|GDaqZD@H73-MWmfm+I8!)Uf|I{mtLowB*y{RdM-PRyC!S5^jHp^qe5msf~jrbSigBmw6~35zNB_4 z$JG~GAyEXeT1Tk~Tt^Ky)GuRf1N!cNQB?I(SV_9K`pA{anND}>oxR`Ipy{!10#6At z76rLXG2K0WAJ3wz8?FPg)uvF|;_Ch7FsfV0%pb@h8QhtdmK?lICu$b{5a0h5R6CN% z$QXf4?5|@ilM&J}R#c6GO2P0o6wU?bG{I1$*aVJpWRw1p0*c0=ZGqVJ*Nrn;caxt8 z+-W&YX+=iFlii!6IQi%(F#_|7EBw}@%BHYL^v%1pnnRNo)OuO>1|wgZ#UcwzBizPc zH!B3rUpRhmd^VHKXasrx_{dfEQK*6`qSN;cu_Bok)f9s&O&Ucv!KphVu6q`O%%>4I zj0FPR3$ogyyiKkyNe|`NqWr6RGnS$=+YB}c@66}S657wt1#p_SwqDlpqslWlG5^?x zZUB*FaF2TE9>0({Tg>JP8RB}pO`DQ=@TW5~@VGO_vRzh{H{V>c z`F_pK8tk|VvSl~c>7$fR3)GMhCOr}&#et_M#jUEuZtHK+bm6bKaX10on#=rroA53O z!R|T{cs{daYMuzch%2~6=0`N7hrRj^s^-HVyLJN8rQLp4LLjm@KQv0dGR^lbPx=#A z(TgGvM)D_&KZXT9uFnCbaZ$eYcEiV&Xibw{pD#Ij9$|y6&c93Cb8;}b8?b(IVl~Ne zpgy-XH@x}ODO>kXg1UQ8cT3VLRL`=(y)@tCuKT`Vt>Xw!d&CtfwfW}cv9wRU=fE;A zDPwx|6;n-uiS*&MwgcnG)hN%B=E}aDRLkXsK)?klfg%QwYTRvRe7_IkZNAx0HH1NJ zAc#kq409peY7PAf`@cS$EKyp< zT@8RIN<4FvWk^?de?=M|!CAUGoG=O!^45vd=|lHJKRS`doD@6O9eOdLb*T}CQ*~OE zL<}|-hhKvty?cNQKQ;{DyC*A_FBpCSQyZV#8aJ<~cellY#m}B+`4OQ#H*HOd_L)Q; zUV*BPVz$pO@{nY*tn#EoBRDf-ZEg91ubi^Rdv1$}>GCmLbGd^?u0n)K7?IUNVXc1e3DQbP>a>imSwGm_3ggR5=h^gG8@d|-4;|)*GJ)v z4*Px;QbL7^1jpDTRyMOY9?&G4_PK8v(xiFU$!s|%R?_&zlDQ=|x*05`Y>$1VCY*71 zZ|XavknYp!CtSo)18|3~@FM$NP%-HCV)WzH1*Nx80^`JRjaqz?L!D#ic*>f$=Y9JP z6So@|=e){F2Z?j1)Fe?P>~BBZv^Ur!JM}y`iT(w@-^}bp_N#jgdvza)vvW3scLn<} zttk|s95HY}V4}^MMBjWh3>J!;t&SXGU-2-B=UyWAwK|{NlT>)TmCK6a81|ry1E}pw ztUq`35#eR}%I(4%WGP+_Tbiw~$LmMb&mueNkTLJNIdI zpO=i4hT66S`~@QQTAyieEcD?ywY@p={=*tEJN7ofEjZNgFZD-$Jd=d`sP~+0taw(m z?jntCn}=9Pz+9pN_6}4}O4!h+-)qsj)@K$o1=XJ|dFHO%KKy`&l2At?&s4)At`Fac>`@^bbzPI`-n~ggZ!AE0EFvqWaPVC} z{TDY{-)9^P77pp(ubxVBRw~?X$7_2_wEonoG=srD-quYQ6xD&0hTwh&?g`qd1+SIG z^(cZ!7^p#r(?mU$oIU2&w^`|Ht2B=Dd=7SNVkp=&2@D3F4MNi+}S#oGxW`f7dV}r6wK52J_#Hdq87*4|#W>&fM;? z@58?2ca*XyzpUr<*^P&u8IBE9lWq+ZD?@$Dub39)t>85u-Bd4CEXqHR zy-tlK*RNm49I+AT*bQTnE{+rOV7vwEQ!2kAyQ5~19g5@<)qJED5LHG{rZR|IJ=)VH zJo`9pQ(a+uJ&VifnY6vO z&=$wFKcSQdJzp<`PcrViz0gRBj*4_n>{@Duok)CHkX6ArNRjHeXcYL`IFpk~5HE{3 zxjrOtT&YLCCat1)nC8?b_hK-zv=O1ra9@?d-Q|>qT#DJL5D0k?Gx(Yv&Zx{Hj7hp%j%1m%ink3q^*#|Mh{vXv)r5)X+YnGE_KtX?aWw_i$K7O9%#im<^Zj6* zn3nh6(tUp9v#{+pGq}?Jx|IH1mHZ!aopR?$8?4p95HV>iXULwOO zp+(orN@Jl9UDpL85}=$mirdPMdEUj%ZyvVRSvqTIY7C^VUz7m z$LtFO*93el<894eH(o>(8`F>%KEg7K%0a%AVH+E=s; z%VQF=3dDN6O&uhdc=#qBj#MC@ejPA7i$33OGH+pMlX@e$_fh9AADzjEy4-<=_T0wp zNTgP|EwZ9USX~GG#ONXiA6swxF=F}Qwime75_-+Bl^VOdA#w9iT4c5Y4i6nqyJ8`P z$79^1_31QrCmZa|H+T%JyV^6KZF_Njp?IRtGevNFa$?N#6tzAY;kfg%86 zq*OX5=GjP~7$2$4^|wDBFEMaES*kBc624o!T9DN7C0WbObzz{Zirbzothk7pa+I;P z4OLOo8)LmiWSVxpg7V`lCNHlk!GYwk=4|+2PK)DaFhL8q49Elg$CItV_IpmS(i?MR zfBRLU&}sCa<0CeVpq*^MvLACuaEA_XZ0m!fN;fhXzEAZTi#x8~*2Evj0%`L0@T2eK zm>j)>V`|7NrJ?!hJpC~;7W^G{^t>CZw_L@Yd5y{E>w4VSuK8>k-cJ2WO%`d}ln-te zSrS(^sJ<98{5HEcF`?#Fv6vH1$n<1I4o?|&IZ@Y=Elu#ANQ9bW}3Zft*uXkTa=L$>+CG)UgoB(*z+bA5V zm39x-7zO)e66u9xSdY~6Vq&z&&2Xfsw!^^HV{vZ~??KSxZSl}96j?NlE_NI*xrgn= z?S#Hz0+c)Cn5u~2Q7!QS;botB_Fm!#t^7T%mAS7%Zam24Cg;YcSY2Jn!|RL7RV3hc z=<36X;bhyojzCBbyfwSK!K=L|zq6xhc4Ef0v)Hg_KIeN%?l7@bHknCbI!z=Ke9#Bq z=j=0CHO1qUK~eUfFyb{FW1T3|9@laa?or3tIndBaeH)ygki*JF_Sp>bl+w1&Q?@qq!o5Tuofmw2SuLQITQl>C0H|Im z7&_AII|i~Rs$sa#gjO>wvmFLwz;$N$_$ly-D5*q{2Q4k(Y{3hNIP?tJRX-Hjrr{Z# zvP;SmxP=#?Tu&auQCoG1?`TS}(jOe3X~eds$Uwv; z9}l*Sy#Gfjs38YBs;QK2ECti0u7cSZSqq;6A+M;d z#&vNC$w}`t)L;eeegd2R`=|9J;naF7(`0J_w1Nu`iM`k3Y-6(B-v#X z06+|#ZDQ|m_qr8angF)3!ga?fubn7+Z!?eLdRbxG-ZA@|m(p`U04jm5KJn!}H}4hA zWWr0+55{HF@I?vp*GDFFw!YQ+mF%nzCWS1NMY((HaV1fjCTU_SXJ^J}H{8b!%F9~| zii9|I2%r+F)(I)^+g`UrGZtkD#h;7OGO@?#pi=VsRDrpD=LBTM=DR5nyP7NkWWUx} zsL+tis^iI0OuI|QC8oEAb1S&9iWOqVc-L1U__Uwdy}J=ViB@RwHc{=M+G*npbE4l^ zBlABGsmipnD>bbONZqtc;3jF6x0`QJ$00Slew3UxWZfkD-(Pa?jN-&19SR8n~T@C#Hlb7F11bf_s%7;M9zf z;ge+&xHd+7Cc2O*-!N1j1GeU^yH&BeZ+83%y^R@4(^(ah#Vx)brTRq5KWtyS(-QFl zFhoy1G3`o3KD;Q#;wGOhAvr`JszY4G1Ib)f;{mY3Y9h;yET#(BIa+^{2LIb$;>6L0 z?sCz#)>W}0g@#R4N#4YWGS66r>Z(l+OPo~N84qMz$(&))wbOE`m6G$Hy>vZd&HLQ; z59oyC7Nm0tW-Z5CH@Qk)OwvdHgdpe%5>4L{K+*_AYN-;N1}&LVR_ZSG6#2do!;3+B zvF?Jb$V%u&Df=$3=lh73++(nsa=@k-wji0b+LesESMEot>xh3gC%K^@ z4J`NQfKwJcPM62YM%WOPTG|eFBN4z~>CwTdXEGxhkys_C%^90uS~#=pf<#B(%VRz6 zZDP?>)!qeUm&WZGkOU8(bLrNY2=%GL@%CP%d-KXqs^KE4&;9q%W*;9&>Xe zq(X&3Ax(UoiE#ES1i+ipK0K9xP=Be_2Gn~Dzkb9M;K14)po9bBVj;-0qw{DiJT>z2 zBG*w2l|QNJ6P{~Ge^aAo%mv$PK~FsydgdeU!H5O|!fqeo;@pIIWOUt`Q{67Ce2K`2 z(Utq{Dmvmi<_a@xJy1!>2;YOGOKP*vE;d`A282#>pn?)H#YsR8uSjD!Iv^DMwH?(n z*~}D6`uYTGV8G zt8kI;NUPK! z+zUeQlbGW4m{VKYG3(G#8h&!ex`l2n_47%r$5|D!QmlW}0?@Zn#5$Ohw`u5~UEV*| z{HzH^Z3d@OUU_~|JC)P7D|O^~GQ4=l^&#N`e6;-yma;Pk35Uh!E6RJ|(5}eg1XJ5# zVy+-dp9)j=Hsybz(J<^bT8-q4wKJ!6koAQ8ALT3kW( zW2Uz-wibV+jp81rZhBLVIuywZDlRQ1+Zp*S2n2BIl{5Zf1$#)Oo%^r$ffHx<;?*U} z`|xxaBX9+yJ~9M3MZxjG{~utiVX*f-RFaI8IBB7eg~`bPo{D!ukOb4Wy9=0T4cv!f z$3HEAqpl7pa6Gk2pJ%&CQ9{!#;QlU<_L>7x9B6>lO)%J$JEb$T8XQa&%?i z#WM;$HjkaUM^#2&OSNf!yn1XhwfxEpns1wSL*5iKm7oOWCfDyvsY_~_R)?IcI8H8t zg@nPodXKqu)X}zoxJBF8WZ6DXV1$UkYfvk z@(_6O-8h@&O^m&%01A2(-w5v4q9Wv)A&Ccn2=6)F&9f))FztVuO)i_ zcE9lFF8QEsi)(ZvW#O3SkRcK7ds@1wLcs~MMBYFtBdAy{*tv_$Vo{}loF*Iq(nfFp+Q=@C5H}y8B#zxC8WE%8wQv` z8bRr9hLBW&0qGJvYkc3m_t~GbzyHAbm6%zxo|tvt_jO;7>}>jB{a~tO-oH#Wa1BLd z0?+VrN<(iJ0H94#)_kbF8I_)(7IP=)x%M%K1p}-2#s)c>uD;~OIROY_cazru^%#D# zZle82RbkseJjhQ{|8I^Uz{ocxBf?Vu!T4rz%LJgfB(PmOMont zLz9L0(}^}jY1F_p8F---^$&*{4#2Hw(sU-1{yq+d1%Ppikt$!J-6Fv60ixTQ8R#)U z*`SfFv#&H_B2uujGc)(%S%&L;~j|7#Lj%LDtm2;Gq|qcP&jpM+pB1!wNzRPD%;>1E&hS z<$t`zKji2K0W^~7fAGuyHwX+2z<)%20hqVq92hPg$DqgbI8kVtU5{t0@Sl+*Ckaf8 zkHVd9%>4jSi!@!w4)jCMAMEd&k1zoUQjh!ZT$}vAZ{q*&Y3g9hYarMDoPED}BSxzv z>tC?5juuS}md{B43z1vVoB9CCoWK#lt~0I;L+9gOUhlU$9-~{rg^q{$QseCk{+*OW zZ!>_Y>?Qf{Y_EHWNdy71dhfi1=4{clX%Jr`K^t!j;l zx77@pJCm-A1p|9SJQCwJ!#HeP|mEWnIBwS!L)uF1cez~j|Ja*D-sPcTl`=>3m9 zb9g8)z_e~GN>2gol%B^P6K^3$ck;#VAWJ*FI&oTGVyGSW5(}kg6OXUE7=yY%>fd6k zBW>&DfPp{l_oe~6xZ2>ZMd_~QhYP?neg z2Gy#(x$>dGoJDs%**Ac-Dqj$#mCE8yjMiV0N2jRG0{r~ahQPJxi2X*Yu*q3O7n<<; znu~_}e62-vjDbS5KMU7^=nS8wcg42armUQ)F=;VTgzUa7ySE9BrL-{Zq|%5zqGhH! z1MPvJdg_k0d;Fy`mjDWhnmduU;|l05T4=jLIXdnGfL_uP0B;`i1IiSSOf<>zW31LC zx$;E4;&N!+HTT-c^}ERGr%S+_qQCenbM?3N@hqal%~Fz3>|}hKB=KK5lT%Jl? z{zhY_4$hH=F9E^DGfD~n5qo2;ol~?(hph$8ZD#(BWYC(KhJeeZ@8XC}wQN~)6F|C? zbd?SC!;SpSP;4fuaY^{kHss7e>c5bVs5rPJ9vzc){Q59*4Fsf*4$KI>^^Sol^NC1h z9^h2npXLHn?s5cOX@!;lg;Yh+#n!ZUH*Mf%uBERGfUqEm%ul!Z^OKAr<1^R@y2-2n z!qnR{*a~c90_dnH1HfOaJpl>81hq>)!^n$1>M7vq_B*@Ps@ohm6V1#?JGb}Ca%m=^ z6ydFlvNtWiZodk2Xk6jnU;O!%4@d@V1L75fpSpkbO=ya01krhe+D1?3?)k3}Z84-? z$L(n?ngm@<>Nr2G^>07vLj#e;CiCIy8|ZN2Wvth9p> z+jTtz&n}k%NcZ{X?q3wL9DV;X(Si7f049CEFm2;+7-Vdl2WU*20b&Sk(m4iJ`zN8D zvcini_B|{7r#heho5`fiq$Ky>(aPmuH?z*mc!PnAy?x%wO%vZ zz_#h%t-jI^*COzzz3i6sc`j9^0X=CUhvUP#fO^39l}D+QH%Cbu;+ND190*4^{< z5FkWdkJkwOo2M^}5>+{%sFUw#uG4l54IrbnRGJ+Kg$cxO<=aB3es}9Vx5zHdNKLvf z;CnLAQ(arM^CgkH`JLAdQODcev zm-?*+q@@yAz1Pq!wj*ewHZzA-9B;v>C=i*UMBFc4v|d= zb-p~uR&ES9f0+afl5F|7&UxZq^P#9>X&S-b&yaSMn>%$kMy%Pg+}K3mDA zwab-!<*5EA%&c|(3)dS}G01cr^s>B0xW3<$K}NU>t7n0 zfh&wR!ppU504GC>QU;ZOQk4KZ2YTTOupz_w&DnXLHuuo%t81@I!ccE>&7Y}bg9j}? zAENP4wkl;JAUXAnAD}}CL2)QW$}j0j5T+v>^ha7xV8w_P)nHA3L2vWAz6-MPz~thy zJ{OMWak{(u{xf~Qr43r$B<-J!<|uV;493xLG0l;sbs)R4{40Ix4j$*z6vfDJ#mopS zl8-8&1JeP3*r99D^IBEUR?zEp$ol?53iokd_bc1ZpDK>RjX6LSd4$lVR)W`F za|PYvFakXGp`2ry`5-?NMs##;Kj$XYC;UeoafN{F+q;)^PUIIJ;;k z&3o)K5T{C*82&$n)_!ttm%Eqd2PmmxpUQ*GvxI+K@*{@y)uemk-QrRG}4@tPcBjD0d;p9nCx zpuKS_$91_qC0IL0g3WI6(9(xozKe0cJ_jagTHJJ2w&8$WsdhwwJU{0)^J7yM=36m> z1$TU#p;pt zsvm}5gQCl#wfoO9j0`l(lch{)Nl~6`J&J3Y?}ED&fi7-7v4xUpMn(QQN$W|;iLHio zU*3ZLrMoW*9|2(NLc`G#1CN4fYh>38k^G_u;Z>0yTmu>rTS&NXH5o6Mr)cku_r95g zjdvt{d9aJC*+o8~Z`r&dA=3v)ZzZ0wv+1^`%JVp$Ei+iaCPUj}v%3S{Kn_xS?<4k& z0+XM%z16G9`ADXy*Nk%?Al)}H=5|a0d4$(Oj-34J{pdwD2J*Hh*K)q!JNZ=p^nj!G zAWbh^_lYrl6~qIQlOn5G%v;AOtZ}FKfY8EVp$du)?C9;FaR7pSbgcp9G|M&9B#+Ivq@Gjyvh?)7Cd zR>OVYk*8u-R-{tJTfC0Hj@C^V%8p();VUE03>G2NGhsCE@xc;w${vrFur_!%>djh! zR-=Y)+IGJwofQS7@ANqs#f#_2`utaDA^a^i%KV-MiS>qhPnp-O<9tM6d;ua+nKTS1IYlifioS5*TbqE?|~QG9TeN(?^n# zwbWX?+ux#z7Hs z&lp^&(!5g@f-?IY`n|xl!e4TkI~=hGKoY4-M9%$Vj@xZm2-iWE5kX@1n0wA@V>iH-Sv^ zYTQG(P)AP8p|g9+Lkf;5&Tm2>T1EV%vPcyz2Ysx2yr}P4%ig>*BkMQktDS2Gp_vri zn$F>!-*dc4CvgP10^>YJXFu6HGo^i0y4hwn`4v@`8j~i_m<)9Y6RrK&%aKY}-`u!% z8)nF~?|0#HB64|vSoQm|AiP?bW10NN)QOLrq2`vhSi@xY(K)(K=}gyiil3XlV`;Nj zu68SDRwzEWc4-*h?cBl}v7vD%*hcS0*~e(w=(Ixj`RV$DNA;n(5AIO!Gc6er{S4sm z*ScML?pYjhpvbL65t1|KrsVN!_5l6Zc{KRgLn6rqF`VwI-j7c}u$O)#in3Khs{3=R zuqTq`DbFs3tm<&T7dre#qD2Y0e#q(f3i_z=)4Sg>{yM4`%a68&6|bGTA6t2+XuX=m z!-zzurUnzCvDNRZdNP9A4yz$6vheDG;IW1=CBls4H0@mr-|d_@vRF@6q^fUa?veW^ zB$SU!9DJzTlI?tt`irzMaO)*DjJ-EyE!oFj3=P-_Ia{m37iJiBj4J3`wW0DF%6As? zQLOdi3p?lTHo`LeOpz!PaE2`TS#lzd8C8LH{6YN^7*3;WgnJDUnLWEB?yzj#o23an;^S zF>F$A)A^oEJcR<2>Q2v2*l*3!Y(^)+F?n{MHXndO7Q0!MVerFuv)L8dcgUiq|LUIT z6P}@7M#1mY-m}UkH3E)rOeX%?4_65s2~fg*EjU@N1?{-pwz}6%EYWwtBUx4{M0mQg zJ$TN=Q5O5%fgRg;^wC_jdnPW&AUci`vWOuB}%geA$d z7+X>&xZF!KIwnIt5{4piG8`{oL{tg|#WwZz2+c|}(oJdZB7U=-&C=L6w;G+|dRs~V z#xYZV8eu#ux}JC1@47$9M5^OD#x+p}9#iNoF=I^SUFZ7s?Q`s_FP$#~@pWIp%@(*k zkg4wtA-kXZEck;DFYom+;Y1xZ%#!S4NvHPD-o~iEe*1;)Vq@E(7A~+-V6t|nSBY@= zn?Mk}>dhlnDko`kTCRLYml43D*sHRH0g%GjlQgBv)-SwAvmkS1@8|89MwFktdz;+| zRD5UOzN$|wc}uy3){^S?!(*zaGVc&sT9PX)TW3N)9$f;(pN7pXG4IY}#^kTm)(zkR z5HHk;5kdI@Z_j&&W8MCnhHvtNduO3&@NZw_VKM%?h z2bG};{7#pkLOofr@h~Hvz8aVgFEdSx^n>qqAKtL>C4d&>jM#q)caB6y2gfX|;N}1b z>6bI)kTQ!fCYXwi%m*s>aM%u6O4FE(JO65e@SACB`99V?b9|;u2Fum+nboIlD$^iw z9^zGNB_AjaOf1qY7udMh0Zm0a%rY|pT74|QtF$J^h06iNYgr+W?aA`{DWxu4q@Xm0 zioFz7flbrT5@-ENXJ?)41LRfOHrb#-?3;7&!6$4?aM7bCWAl(e`=*Zz48$b(BhzRR&T?+Tpc3-^FyRyKL zf4$S~E*zvT65%0L3x?2(f-Dj(n3$`tq?nCEg6Dl~u|M+1gvLdV?MSu)j!1jV_X?$z zbWuo$hP3B4s_;M?Q^wvP)itSt$cUbPc%Y?#584iDic$;|EQ3(-oDQhmTQHUE+EMi) zdU!8U>q^7Yu!k}rx!=~unP{uqRM*f$&(8zhPg9`i3sOgR6CfQ1=uec{n>xKvbqz9b z_Lct9JmS4aC`zPdrrr1!sz*jjOc%@QMcz)O>P7+-TExfOEqR-a3^f~1LDr0OY4W70 z@Tzhzl0D!3LTout95<49*M3r4Af!D3TGXLxH~oOaTZAoA^dTb&Vq8_inz@1eN&!%z zqqCOJl;Fjx^z~oq3o+v)VbJB`AB=Ld${s~T^sIw8KQtLfP4TnL4UXeEcVj(s3si9$ zMj4c6@pq}s9l?lO?J(?q`YCm0hd5<};4Cku_`wfpLbxSKkxc5!V)-j9NFaKZ3u@Fi z*$ONpo-3MrN;z{{IqP9mKk(Hot^fW;albo1qI#PfT7@(>T#kC!QwnxuebD?+o+M}_ zVk%%z((dZD6EI+0(od19P0kE$@>EDc;>ATv!<_aujP_jE=LF*1e>d+Oq9Y9)J(>kk#LFeo0>@cmzRgy)Ou@fHVlmxvZi)l))r! z3jyTJ$mub=@AjsQhqr{5myEku<^D{Fn1a22xp-8CC}IGP8-jwl%!6UL0?lRZ&|0b( zyLoUWRq>nP%6pGgmJuW#@3#6Y#Xl&OThY}7Gpq^!VuRr`y(g&5b=iTiL{aYT!GXTm z`ebO$ljJXSR&Q*W^}VM}Srw{z7BmDQ>c0(_l?C0xmRWlI z*Yzuzivac7e*}7joSDS^bFgAV3vKHy6TGGW)apRUFJOifO%ZgK<2`s&qz1 zU^nn;E8xFafQM$-o=n0oQkfS8w@#mHVXepU)0)BI29>`%Rp8}wkG;CDtbFB`oQN}V zJaM41jo=!fl|*&S%ep<;IQ8wcpO>GWc;Pxe$){g=poJRi9H^D|BiQl>wbm8TH8Uy7M! zYh@1jV&e@;SLu*y=lZ2tiDl*|oS=#+UD??Q48A)eq^$$M>wx*+61}?@?Hfp^eQ}>ekE?TvH#UF=3%CiMrPKV^|R?_1W)r zYKpr#<*-fN&BNY4q5wc)<5rt(`Xun+W!=@!vJO59hJnF5N~w6sBOx1qSRVJe@;7?G zweMydE~w;y?wnfY`#F;R{QkP;Bq=v&G_*AmlKBwDlRV)tH-Kes0_K4yL#;uRnxr2= ztUtfCzg+sn25|0()v?lIL&)Nf*K{8nTm1X9mfj22U<}0MbyVH91 z!s zvZbo8Q(|ujdvWk+0qBRse*74k&#^UZT6GPDEsG0HoC(Ia9C3zS<6sKilw@Q9##!(w z^^C)nCjQiWhf3SwB*+0^A+U0yA4Qp_DqU7vllbW-qt5P2t!hZd^NNtWTbu?l7s#8* zImGLo4M4t9)BMVf*2Z)1#G=_k_)0#qwV(}}zDp2j>CNn*Q%)>)N##WO#EdLX-`y}c zc5U9Mj-m2y{WeOb<90UR&J?Ss5i?#!v^5Dz^@Jp%Et!y|XQ#C1nkI{FQTWrSDudiO zO)0}qluuGI>Z*?!m(?!NpX!y^ki#G(BcwT-auN`azuWmRshI&+7(CPkLrm zlfeiJg+Ns55G#m1=&pr&=I|*~CY_{2V1#&4Rs+s)-P(&Cu+8XxoR_=XkJhsp_VYkbb{roFM6dE57;$ z%+C~0%Rn?XMX(ca+&BPlJCSvEdVVwH0rHx8kR|>$c>6o%Cx!mAW)vt=$fd;6I{^a# z;IdvQ<$Wbc5dF!J+n*UIb657^@@EmJ!oecb-dgDGJ7_>+5XQ2^ryau9Qa|7^V-Z2E9~KrND8sfpTo<+#uHVZM6yugSzm=187_3KW zvS(c^=~TW=fm5or?4<#F&Z3{%`dEcATlgr5L2Wlf0yKi^vJCVU=BLO9Qe1^Hh3;NZ ziE%%1juoEx2UvsPYY>KAjLggG4dv&iQ@|ssgO8EV;vxIkBK2A|?R4BNe2&+RxKzoq zVWL7(_!Kkqy&`?_TM~+y2V{9A8y{xxlpfYJ#A*wZlVa-XQ^Lgwqg6L%DN7&v!&Z6UDI)G3(Gp=S=^P* zOCEBryB~-#N6-&y8XQZ{ol%+wdWNJ*Z-s8pXr7j?M3QeQb#SL@KU?l-Q0=rIre9E1 zz@)l3rM$}0iZikZnV*I`qaHaE*~tvuZ|yeCBAH4HgnxM{QEE<7-S{kMw&PDzmb>*) z#48aYCOe_z^#K5xd0RTrIE0m#G4yUrK=CD|&R31Y78$PN0v<&iYQz%QiYh8rSwz;! zPLR!g_T^XNg*Yfjv(Rd2Ke$=a`|Zu01&~KYrKhWUnX8L~MJawI+o!CKx+V9_m$f=f z$e528itn=c?7scjBz<)@KaB#iBhN6ejFMUgMG39ap5zRvU@utXZT-;v6~u|Lmo!-K zkb2GiF|?I|R5Fs;9K}mmtnVYgyiH~ow~Bw8WnfVtmk{si9w7Q-nhe7J8Yk;ZE_;$h za86hpR6aH7$&P2c*eVP17R4LCp}Hy4adb4VmCEh;04Z40n-Y1+*hfVT$Z4ZreL^$< zy?rR*T30-s8SCi}U-fe}RTp!K#Y-r!@T6>(s9KMIx11EYWz8KtTlo5?Md~TCXXS1B zoY!-m8sJkYJ>l#m#CR4BBy|*lq)zfz&+-#urd~MByqfT%B9PmsKQ0lJ3l6}d7?G{m zWt9)_@f+=l8)}UKSK57v3a&u?NJ7wO-nmOr%8S4F?hR}tlsLkaK>DftUV6Ox_j0vl zBc%tI){9ob<6BBO&)0bV90vo}|JnWdZAVNxXux646_D#5E5pB+a|5>C(a!{#jAJoN zB_d{9#HL!&ZxIL054RO1JrsX3hg|+Nz}d}!h=h__TWu`MhdE`s+DmY?`_}0-XrbgY{n)f)4q+UW&~kyN5v*6v?R|tUJx-;HdXJdhe#Zt!j9~nc zRkCQ(nUL+WZ+8?V&5zdnditTOg#}3}=AM3f`dJ|}XVNNcoA(V}zOh4DM>W!ucgXfS zf%;93?QUz9Fl@OH>xk>cpzo;QmWk?Z;rj63s~)DDQ<$OxLcFOu$#(e^mu-CVtHEGSxik$Jr7O!9ohqm+E)5EbXixE;V8w8~N?j0grJ6E9-kU zm&p-V*r#%dAO3#h$@4{ewI2@T*^yUxRO_eIyros&XVsR!KLL;&_I`IHnH3$r=tsz$ zjmzbe1H4K=uj;Utf7f|Uw&Yoq)~&vUC}HHJXp-4%vuef)kkwCpmh7E&%}vIbeFWI= zd3P{KKO;d*6lbm#4IKk18Bl>#b4bUjK37=^&#C_kJDjr z%){L`@52$Z*aKbx;b4a=B1yy&#aSdbSiT3*w)H%+m4-~b>Lf# z9QyZZ0W__8%%R9qP|OpxEz6WVt}Ava{3PP-DR!`>dXN3JM~BowlaenW`8y%jdVCRs)DcR=Q=a8YR@|C ze$(vEcq!=k{E1E0s#=1B+l#@%K26qjRNJzDeNIcbs6w0bVCQTn1yG&`1}r!lN7$<= zfPqJwwSxm`e3v?qv8Ko2Qe<74gV@5sq)h!ax$N8dF=Pj|?*;=E{>W;<(Cct+-eH-* zO7gy%j{0WC5*+TM?d=yj{Rkf|#sMSyPib3*A=xiF?)K(dh&Vkfi3%@fvhGBTR|@>e zqrPA|{jGg`J+PgatA3WurN?mIA^R{M!?SC?B%daUu4J0OZ-LZChKl&ql&E-w-i+*{ zy1G+Q#1dm|)3KW2YRv^t9c~-~o}B9!pogquBRdNCLI2#nYsYcJjKnnVqs>OwY5K^6 zy*Lu6{*VdxFuqq%o#zI|>nr!!!F9%rNL!TYQ3EkA@l{MZ;cqd{X@cWAwh?IxQnQ52 z-kVg%=-loRow+ejCa3Fc?sNUN$DoI57D=m~uD#!^JBCtyOt{KP^KV!w5Zm~r54O`! z$$>?zT-mBU&?i8*ei0Mt4VrADapoRO!WYoz(AHcMk2mBP73|DqUIrW^I}yQ!s=f9r zJ29<#Cd+CR0xN=6CWNaGpCZACKg1IIWfk{4dnxks9T4pp^hIFAZyH3MzD&&?2G~I= z@8xD)?lHOgJ$@~DM~OAtLl^rdjxuA-MnmZWuv7xqXNO>Q6df_Q{HMWhYQ75zk7PY* zP6WHQQoDP{xG)?8oZ(ztbCRSQMVUb;%_r(^GQqLOK8c;YJ+iNOi4812(Zsn3GVaB6 z-7n!tG7HiE6u)>CltSR>MAg_Y^|W$o7V%7chSqs@1;fz8O-pxc(y=jEC8)c~2xI}} zd*n73W@_1EwGo_T9}HTc-NwZjKrBfAi1*o4SgNpl!?|UzWi@r5%#vz??j1JhpEV+; z{H}as5iQwyXIP`_6Pg`mm>3S3WwnoWTNO5#i{dgl>rwp`VFoy#m0T~@!pQ1@xla4d zna;kjY23C|%c1lXbQr?-*$_#1LeGBoh^ZUz{1?xIyCa!*z^uat{__LXBbRylU=%y} z_P3Bx8S6S%@CLh#m?*xyE%uV)Wfmq}tpMBGVh;DuVtQbTZeqS=>+abfv{}oXh3AJ~ zg?rAcF%C3JKLn|V2(LX2ZC#Xu;|Q^~879p*)_;&MOz9H?N|nRH zMgn8Ys@f+MPC=@rB=}y`ujY+@NyExb9y`(0KpVg&FiOr$$=xyou{h_u&NlA%5b!G& zlY1$62d}dWJwCP$8TcIU?6KgZzL7`jhe>ve7W-1ag!;CwUkmY389&C%#K(h;=(Za9 zm0GZ;;#R1@p;6NlJ(zXxKJx&9q-RioneY2Mf$lCZE-H!@hAc~~W{Rnv00n!`L>WL; z=_zH}o-p8yG!r_F7|$@()VJA-(~-bMf%U}cmN=9p~Ii+`O&Wq zI0MJOKBEs_8D?IL9)6d%3w)_Q=v}144{c-|fK-Luv~@I=+$B<{3G{V+s)6J8bf%vT z=F`3luH75ldle7G6Ta@Vg9lCvi%X^;Q}bH-0TF$7Z%Q_$z3wxUSQBk4WPsF1s5T~f zL?4%a$E`_w&IJ|4^b2HuM~bbMDK@@EtId|oMY;_6-VBuj*+LGy1AK_@!@`^7*Uf@W z`wNm<83|#U0pWrSW|}CTnvyTjx(lIz0zXuv3f~LV(cj=tm?Ja)aA7*FeOwAY zxAQW^tC|&amMJ}}$ajuOTqm4L^o0a~km4;^fBj%WWL|ZH$++pB#_e4?4b1gQ8wN{5 z%4xgOMHwc-WA=L(YDty1m(J9mVp_P!J*t zr=qCeYuQrUnqi}VX~u#_3sbgQholO97!99hY?-#_f~S%3SeR)Q zlW0kRQyO0I^cy+6H2n>shM^d99{6Eps+Kwcx+C2|>mK@q&uwV!GPLP>&1+Tnr0nB8lsE-H4cTQiKXH(CwEq<$OJzzxk z*Qf-6g2bi>(z=Maa2j)&#-Mx&W|qhrL;w7=o?2t48vp@oBaXTX86kEhK{3auOnmEM z|7RngJ;qFH;ig5yC`gFdy2W#9civH%o&h0a`fn7fiH8`}*1-L?Kd&SC-Es>cFS9L{8@Y(2We?T6wj=!btE@ckl}Xmf6EU zhxEE+z&-sO>rwrkXF95A_pI}Qe?PIyQ~G3O9-`0P$R=R-F8{Pw!`?J)%RC$KYCP;_ zsa{#6E*8dA9Y{H{f_u0%^I26TucJj&VNEV2-mzCHC&{{xKbRJ~Xp0J@i!%MLFi~qK z41)wSs9DqQmAF}E#tOfEC`g5O$o8lIJXsO!Uu2YQTU4E2|p`$i=PuXHp!`|f_dxO6}AicEWsvi+DQBXwJBR(kEb-P=)$-2?;*0%p@Z|8Cc60p-{0Kvxr>>kq&4^!@xmGe0td zGl)ZbD~%D_PO&{iJ4WFi6g#ZTm%wF(8~_n?E6dh7=$|j99wUJ~V;z^ysHA02M0k!W zA}qU~sRx+3t^J_=31Hr;FYGfS{r2BAb6kQgy{B>i7UdpPd#2t zB@253>&QJp8XER``1+B<$nVWF2Wn{B~8%vIk)K>DRasi8|s@F=ZM(;KOl zh!_BIZEfmYhHHjMm(FAM*N|9SCjj^5FcLUYZFZ2${}w13?6Qk%W%B_kpVPEY&m7G_ zRAAQMJhiLO!-7YKem5++yh}iBv827XkqVyJRs1{YWaPMQ0vX!JKp?K-&OaD#^z|K$u$$A zE06ojl5)xIv&6;nB$F@H7wB=2_FmkggajkJz59jrqh9@kYDfz!5a}`hy?bE{JbRYZ z5mqS!a>#*~vdzAij!E3t&T7BHl0P)uD=pi|nHT)D=9?^t>*d#h)(y%W5cQU} zcZr#-)vE(g%|?=QM{ZTivvdA)VJ@!si|t-V`DpPWAM&{k9&xcr^q%hC%$Gtqj>h_^h_yT$qKO9 zJ(i;SBi?KFR}tNxmX5!$UHsWfkPDSUlJVTNfbHncI~Z+B7DD7t}OCg(s?>c5VObeY#s_nl*moic_}$ z`TL(`v31j7XTJjBm8<$^)e5SPeFGk|Kk2O&kGr+vGW2}N%COBYi^mmKo#-5I@r%))deD{IBEU-q>xJlET@T*R zlgZpbhd9Q84jo)>5sH(gnG4zo5HXLuq+L7dKrPEPLvPx#hw8?c3EVs1K52*0Y9!@K z+682q5(&CbvEXdJX~E(BjIzODI;wDO3U+a_w@4uF3n};wNDnY;=V+x4Ql23R&Tu}B!ACQIWYJv@#rk9Q-u|icC0== zj+FHR#5sewtn+#J<|p!%4;r0iedzAU(~$p7M>Yf1wG`HAzY`8Re2)*qH=&{@oa4!_ zPuJ{2AM#f$*4Tu*+}nXZy^;6e!&vw4UB1?hciRe`>3`Wm9(IRIK>5Ie9d&=QX zgU?{ly6M+SJ0CgcejRC>711cj7(9QB>OC+1a^E>gCSK+pI|A#1Se{xV+(HDOkEuLn z&Azm})CwH+%I zZ*DnR*6Skb9@3$#%6hyaWo~IPQ*Z-^(|`8Ezj3OjbGM&Nx^O5=Kbo0Cj3K<9!=Cd* zeKQSVE$u{Eik+j*!N()k=0WpAvB8){M)abrUEwc-_NfjU4Vc$!WSWn&ov-vs_FuIX zz)27e0GU&eXOAsD;kGC?`185nE2pE8Mh0h~yT+Z8{*r4^yX;S$mH~C4FQpG~#;r$3 zY;ajEi5>ac%l*aylT3&y#NjuzTFX*2aB@dPX^D}x0)hEp6?spqy%rz+qtd(sy(IHW0JLZzrYrAHVFEf9UDjX0PH&>Og z1#i{f*rxxk9r`t494f3FW4iJXL;_{*ZSv`XO}$c|lb>2hJ#3gc17cbpCqz10`0gm(QNAuCt`7a`PyIg{(`-*f$M zQ>IMcziW@JTBDW`J#UYbmSXM*&O~!@&lq!}mFL)bM?>!Y5C}^5LpS0WsS;)3eKY^< zhcwcuDas`_FuGID&;DENzgPg-l%|1L6)diAk1_h{C+ZZsWh-Fd59*%u19h5!p9ryh zZcJD0X5bh_L5L}dQRRL`quMRp-Aq0seAimdYcjE8hy!CTv$!ebP>U^1HdmEDI7V4$LM?v9LM5?9k0jEeA&>>zx!(3mMPg?+|vyz2_ zgqnWw#MWmfnOd;gPd4KJ0&H|&?rQ)xm*CN^>h}w$i-K=!Z+~!nzuK+kR{f)*jN(1y zG#YjbWk>Rm=v16p@H4~bsL*2t;_r?sa?Q)G@hrUds7o$;NvB28vSQ=Qx2>9 zr>c(Z%^lx$9#5Td!S))x=h>^TW*oN-4w#VPO^z>v$_Y>8Z%X)TC)5NL&?CKx?gg## zMC%HG3S7&_tPnBFwiQ0<+fzPS8jpA^>Q%>YmkpOf<@n!4R-Avjn^T)fGQ1Y)dW8&9)=>|$&b+d-k>ZH`@WZ7D-_G#1P z@BnD*QM}yjJimylIcNeH*aSQ(nZ1Vc5qBx~ZivZ4h)8vdH%!Y#V_Y>FTKAhsu}`bt z7Kk5l;{-@<(EWrPW!;!{N{w%U;=<|N9Ui^;$4r&M5!o3Z`nxcz!08*5p6>CemJN7w znG%rb&V*#%aYwl4y6Gx@sOeK`;sU;WM5NuPSqQT@T|iiiO7TB9`s6Se+#&a|(|Sg% z`(7kLWXY007}9X2#jI1u3*|I)^X9Xihl_tO)w{g34jSco!90Isf++`{OJgU>^BMA~CC0+& zH?U^Ld1-E>!hlS>-dDHrq&^0pbGmGY1#C1v{#h7+7&rs|qJuqIq;ZIn8L7+?e*s85 zv)w(ibKq)@$n8gi)XYP5=9E$ka-3n=lkj81-5W_hOpXReGZCTW^N~Ga*^>9(U*)j4 z`az-tR}QaGN>A`YIxikFAQvqJ#M9zbN{Anuw+>5MA?c-gp~4jlA4C^k zG&#Bqd5FP$PiJ(=zT^>@y3Nr2((Z^@(1{5wYO{0M5TjzU%p6H6ES8+2d*7|X8<1>+ zF%zBQ)$a%#cfn;R@U-iLKFB=0tyBLSjlWy6$%q@oXmQ1?kV2L@I!Zs*np;2GAkf;ody1OSm}-3?0H%eb}E?fP;qnm8D(10~c8qV63qP}Y22(9+m z-C-XzXmH)OY$bhj<0u^TLs9wFL&j8=#Q{r!S6EQfxaYwG1fJ7MuI>x{1x&=zO@+Ld zVGDEQcXHfL#*^9LSiB!*@>0ID4HQv)fyQ5@PqSb(d{jt)Rp>`u4UW|-^Pk7r=nA>_ zT@kRCex8%L^te)IHElH}Rlg}S!8!~m4P@)=W-GXwEGejd0^io+5k0S?_+;QR+c9nhy6DE_3EnCwOZ!Xfr5R%r%NPPcH^Na zql1%_0t1C&Vd6L$~L?B5htOOv%0Y&`>n00n@j<6TPCWp9Bt#3>pN*Maj>2QBh%no9CiG12TJ# z=OM4#vgu^A7ZtyNw=6XZaMa_=NA^0{Mo0^b{?G-UiYxk>71K`Vr6rtvct5DGgPCGA z%EjlmYqsef#;oOJFEtBo)vV@EMJ{xejG5TBCgN?7j~3!-5y6W?m1@?+Bwfo9 zZ=4?ne($=a9_${AK0$%rasjScv2wA#XMbRqLiq|Wu6_T6hG}|SLdyt4B|X&4v7`8(gtlCpQDiIb$c$JoCDkHM=i+wiZ zP$59T(txz>1}{vd;OQTDb)5@d%=_k~Nh9Q`ma{BF1VQL&}>J#)SM9|E<8tnmiVwh~HBK%XRQSl~i&PWAGfyQuzG+ejb zLu)Rb5qLe@k`FcU`ZyjYuS>IZB=c5m`{mx9&>F(speGsM2W*L*BtF_=&qdgK?yoa^ zu?=+`Eg(r`ltDh7)KMI^TT*z}>5%l21cq9tVY)8*!uYB@Ewwy+=&m2uvBzC4gR%T; zwthkJ%6%ev&yHORLBUWvI$3;gcM1u`(<9r7j zLR0j}m`Lv_=FT?n?RVDQCXJXz)*&HB;QN>dAH!#E8TRI}2$xYs(12-AolV zco;14bTtNlV(lXPk1tHzb%jdzg#vp=%-OmQgZE3vUr+Q507G1{q3au7S6JaQUYC|=`pk-PZ}_W!W=mSIsvZ`iNms0bn{2q+EGAuxb6(h^dl zbfYi>BGM%w-6b6&-62CaQW8oC!_YA(sUTfvZT|oF)caoN>-libH?NCvX79Dv-g`aw zbFbe$HCOS~L7%j6nKONYFeai-V?i@;lSnc+UQ2>v=8f}{{b|Qxu3AeGQCjjJ!5$3U zhIIzSTpDio6EI|d27lwc|Ni;h=vw|W+(NJg^p4|pzM%o#E%(n}p6(14cEr0&B8L(R z#|Ctlv|VE2`r9LUMduBk^9Q)_LEfr)%qXubL(G$JHKN(_)=Hw{RrB(no{e)P+c$Blp}u7%6^TnP z>G{&Ju~Wz*BK`|7G$g)KxqWi9u>Af05BY8)K2zhMBRcGx2O-M>nT_|n;mQ1TuX;KC zB9=WOw}H}IuJmC@RQpwHabe)h7t^eXzTw6>F0p89ASv!jfz;|%|coYy@OK&=B{@FJTk9pg9 zgSQGvy7cO&hu!(qJl$Hwq-{BX*|fD55s->npJ{m+GdzGx9e;HM@Ag>xC3EvDJj==U z!)0${JiU$N$j4ng5s&Ek;5J?`SVgS;_kOkY}?EmK9~G{Wz9 z_5+>jKyk_C=qJkFy;h-|~iCK7xyfQtxBG$s< zlr#VK{`&rot~`ryMGDo}Jd4pqNcP!X$^uVT}e=9L=IM zs^UZvCkbNB(}|xfc2A#o9GTWf8wYCRwkBHzp@#l5D$zDU`+q zaBq4HwZ}q=^*X?ihS=@g!2!{a2niVIWpFH%%rZqwz+yUtlL}fk+91wmHWuu5p$vS~ zZ(zzxZ{<&1%9^CY%G$?;mqUqEQT&ckR?AH&=oIYNDlIu38&r~x=ostv`X1iZjpO#d zCc0!}oda!W{5_`(1LJctmGGYwjfpPUJeLSZ6bq3O_F+S|pAS1F*j7tNP%ta&7wq*` zo?`Z-=aU-?)G6-h8hbebYRYu}hith9W@`L1lB(t-^6Hm0!WJIS613h}*rqb)& zOowLi>Im0-RV;L!8z-6PGL|cbAV%h5SS<-DxY)bA2ykllmnm8Q?u;mJ+MPVoz7a>@ zJg#z&b&f%??=w3l-jXT&CIM=3qm1LXTvy@b*jUQsd#!A?6-C;e?Ku^thS~fg=NKfG zOxMWGQo|)7pn`D~pzzQU4O)e4t>Xzsg0-e_hQiM%Vd?k8DrkGgCSFQyR6u;d1GC~g z!J^ZE>G;l+v!)N#t2pLLO%+mjQ!90^DMq_$P@=imfVva}_Z6K^W#NbXSwh+Bx_Pv<~P*pxkqBZGGLjpRj~2`RLII_awuZqP%cf04X->FLkoS%UdHP9lmqAr znc0dDgnyXf8^wo_E__6YMe8~*Qy9L}Ky$W9AL=Zl9nXha(0TrDeROCm+QIk$@y4fN z8>`6%xILi;E-}dd`furKWFMY-0ZhkB?QJ03RAF?=c9MgC`J^({G47AEc-=$8Zv-yT z5n>rt6vogm%=KwfH#wb0LF%VCfxDZ~e+Wo7h?DTqSmX!4!FMv#9=D|BBZ-6(846py z6)Y`D+DrUj;^KVcl$A*%YH80B8MtH6|3u0y>rEx0hTQvNTHCyvu?H9$RiZJ9=3jDi zf+Vc118QR-dP7_KvxeIoQGGb6W`#(38I5w5d%X>-L{hMfo=8+CGBwn+{Z+vtoElDs z7-LQ)e|Q7V6x>Ac;R9}`TKc>ZXlN!r;%WNRvBU8pGl+P9FW8v^ZS`@1GXF}f1hIoM zdQ;$+H8l#X<2KkQTupA4N9|=EKOX#JlvuS{g`~JsLpzzVE}F^=ztBKcPpXE3t$#Va zDUY^}?MJHw0l#eM&o2O46E**DD6SP02h90H#fal_8{|8k?Yji% ztp__~|5^0M@e{4DIf;ec3%E4XF-hJ%v^)CX@y!?I>heH~5 z-OLTpF%58Mm@M>p?@2q-9Dfgr40ugzaRRYXxc3jy8FFh3m}>6)pE^V|+}GW>lgU8^jxm`l>CTH!Q)un2*ggHe`Bx2ke%%%y7SH;=h+AU9Zh$_L(khTk zU)J(~K*m=s>T;GG^@Lr0!B$|&S698GFW|TK&#N=`N=784-IfP~Q18xQ+4w8sNBUPV24|D+&_c<_a6pR=!lTbc zu^WpYp2$BgT*m3X55kZKp{%}KW1C6G)N4!-#hC59F$T%>GMeT?>g$lxI{@9Kc|ocs z{E4+OgLp-weh2VlXbn61cln@RRRW#()V!OoD}dcpaao*{R1dKa_vvFy-T_eW-OQya zrrY(n@l(IHgGC>4A4WVPhAmSp=l~0dQSp7G&NW>G@|SqNDEdq%mo2m7 zW#q8c@h#9M-aGTa;ubnXsG~U5r+0sC-_YhV!2E?g46BTi(;MIZv!;NK$QOV;I8T&AjO(ko%E9lx161pO^H$1OVZ`gVa;~ ze#QZE)(Hr2OJCvt8;W`C16%SM5jn&f?2q;g8rF)6Lkn+$tingSXb84FN94^ycFrOo zwqg|gX4xC0Y;olzPzFY{&3|p@D67j(3UaNDMdsawU*#-|wyRWx0Da`9oxLLku!ZNDkO@Rj2^mli`nZ$4(NtL+zJ6+Lj zklDV~Iwk=b9WIAL>uY_)Se*1SMsect zHf|Q(W*k>6+)<#%NL0abHoh?@l4tJ3=aXrwcqzOhwz(Bz-J_BVrP4lA6x6~)4aIK) z1ovkIZ?wI|$!m2FJvVtOc~o+T*nVXcmle;(*94`QM9E!>&>GdR5VmQ|4n74^Hn$d8 z;|N2PLIzbYC+yxSz*<-4oAu>U{_}4VQ zjsfhkKkcareYsdD@85fXnqAE;$M6G@h)(==^KULY;~be=|05mht1BVb=&f+f7#)U0 zR>?;1unU{@dApNUsTL{>jbHp$_i<-Sd^B_?WpOZLCa?X1W8Jy7(U8y7G;$YYq&CMt z^?mMBi+Yc$>OyG;Kze-8$LSK&TuKd{gIzyZYyE`OWzT9pB`wQY?u|b}EfSXdZjYr;3lB%kX?By|wv*9p3Mh+AmCugLD$MdKpc?yYKRIY597P=K zf-R>Mse5JKE^NDZT6U&ZYl5TIn}OV%)I)9Aj`q76-^4)}W4?SS7mEGhn$+ycov-!cuz-)`sA zKx9A0{@hk100Qc(y!u(Jt1B{Ft@NHBN8IM9saJb9{aB6GwRB>rtQDy5q0`+yAWmZG zz(MbFMF8l*M!n9N1j;X^O~}B$T?OgajuNC*%_1k_rI!WM07OZ4qfhlrqyqoQ-O31f z4nmPW{mK6WEwb7Ip8zmzr_*aH0X(oO`JDAdB=Z(}Htj6qIe=Ue!LShLqA6OnnWSq+?;baZ&C6pE zx2*~CDVoP-s=xOW#3Exfa>3nf{%KEn;qp^-6)Dgn%c?c^rxpMRh%>*Z-f&u4_&}=m zDpK}_OG#M}~NsQoJ9bvWUP7s8)`~@F93CB;x!v&4Ym~`dFYi!K12>o9q7g6X>f+WZh~py{l;!5<`)o0{J@O-0M299UoKNUzL^MNOm zJC1umXkXF`==2hCz)$7=3e;u*C&vd^z%-;4O0>fYW}3_JEe}f4%QEDME?R`2)*#Ln zx@G0SkF^}igNrh!!^zTRde?)?$%qLjnLkWc(h>sSvVoI|V-Ui694Ut1nq+YmGdClB z_cldX*TZ?_e|Q13z~NirSwpu^)(;ss8~9QkclJF4a_Ag$A8JNH z=Lp?E_rK)1!f8X;+Avw=Rw{_^k*d|$KyZ%JS*qagxC6vss6_lQLyu`gGTs|~GJC}G zyr5chJXea&(Tc5sJ^|b&CMpmC8-^I^*^NmetB*J_>h9{^#C$P>qYYNyByt_y%oM{n|7P5t-RlZi#nZmA`a zuhQWJ&Xr0ChGMr}upZP^yvE?iR>SP$H#7biel?T1|GaWSb5)K3xH}a8X*q+@jDwy? zY;Vn55~I43-Xr~b_*vl65A#F#<-j55yar|u4Mwt2FatQ#NZ=;kL`KD={rTz)Zy z0B@4wYrpOZp3PmH&0!&o<`Lz?8Q_u|)NR>Gz$$nzD|EXd7p)(bu6A8l`;-0n9MgKH z4>5S%{AznVEYYh2uBCmWVBiqs;5a?je*brBB(EkN&di<7V1>MwgzN&ko55=xTktv3 zCgWs!72dz4thm2Kb5Mr`yxVuX+ZX5C%?$1<3A*sP^ZnY?%rkW6S=rm|w~37cQ2p(= zSu+8L=$_x^${V*Dtbl5YfY_))muOQbuaUTnvD4|{GlN|awrI=Py3`Kx#v1F7*NRi; zPMYUzTeeSH%Ig;bsZ%l!Lo)AiL^*(5g-+U8NTnOp%Gil_Fk1I?29+4n2w)@yAXcOO zUie#xSBw4rgz!OQn!VS${@~?up#z@>;r&m+=iSCZkDFy0Jo>4T-Umx^2U-f~PgCP09*i9(iV=&}l#$?msR~~Mkiveqi(Ty*o zYR%Ee)MyU?e#{+coZTxM%)B_t6e4v0?w=w$y>Q2|^VLs_6Daw{5ej=B)E4#HiOb}) zX?9kPodp9e5B2W<)tzyrr-u4Zl^uZcQftJ7iiBv9c64EWnixox=J~o z?M(<>{8PUe15ZWfL7s~p9w7*`lLCDor_LcXZLxe&ZYoDYD~93-oYSql<*srN0=m`> zz+Cmtx6(lt&DL|nr)YVSxqo>=f&iOpol1N0m$vmTI#1vmgw^$Wf*kLTQ+r&9xR)ey z3T_K`$FFyF1)$iN%Qd(gL`RL^gvJU1z}Kq+v;g3ml6n3+le=hd_;g^{bE2Z^>LHZ3 z-gGjvJ?j@oFp!lZYU90$L*#Z(E2yw*ss(H=N>q(`{F_kn=7V?79Q)bw1upiCFUB|O zS2F^p?ZB#R5Wudd#dcz%IJb*4C4LCe9=6@2f}NDwd)e+jk^J{$WiA2Hc`?90j5z;8 zcv0a7__?#Ei#+FMmx3(o`LJs@zp@Y8svM8IhDLL`@IBpG$CRv$+U8AUDuhB_5fLu_ z+&akNG)T#$-^i3Fp>+9%vAhKE&n7*rTr0fAX4Q>5;HaW)N&WKU`I6-M=V&Gtn9qTW zkFKH+GwD^i$LbPC%Vv-K(&&p@_w-wJ&LiGEimL;tZruURSpbR^%1?LJqi?i#?W7Lv z2rpm^>RsDD9omk6MD*SGY}^>>eHi3ZCw9?J=v{Go1}?tAuCC9X?%?pn=`clh56dY; z(RS>nv=$)r`t>9%1ivHd*_{+?I?UQWV1id4LHK0t6I7KKs73?;8Fv?d;kmIz>Pr~& zY4WhA)g>qQ%TsZpeaSAZ=51MXp6n zV+^JNxNUalUbYapTusR}bUo#sD;%kSvh5Xt>c!mZgy7X9cGRS){!e+DzYx9I8%>1R zyl9m+lD_>C?`P^otuCj(5|vRGt3K!XN4dtwxg$KsIE6ldU^`n7`0+-0-MFvkI8oww9?VD~6R_r|pFD;xiIQ-7PxvC}Gr$bJ+!ClKe@ z0M>N47#7wM5$tnu;N#O5iu@u*)I6##+1#|0O&{xD@?7wNh`iNV2K&&m-y*nL)rxEZ zl%N;mVJFT{veLf_VN5mNjMoE^BO+MxEm!m32z$;CTGU8R#GQSx-j&hYewQzLS2J?{ z8{v;;01Zp)8|eAGB5jgkp3ROK zz+>uXs_GJ$2RL0bzJ#oeJ1j-~{L!E}r~hFpw=0E+-$vLe87;-E-}^}O;A3r5u9))g z9FpzUi*vcb7J$1oUZeM+_JwXLeOxg)ckQY(KrnAXkQMy(InMXdQ*6YK9ed}t1^9!x zo?Wz8^E-Y<$nG(8Sm3nhH}Shrs^ESO3)#$RWjz@X1>3&B%`f`aThM-Tt7&&gF2Raq zM9rUqg4jOiXFwkueLgkO@_azH2!z_U4=9XG=o&idpws9unTK5u`?r&?Y53&=U`oqj z;S4A`R9_#wv`(-2N){ZV)_4&EHmp9T!Sap%Y?u^{+KIIWJ4|rUV%#KW=#Kz}rY|Py znEHRd<8N1^%kpk0vaXOiSZLDoaU8C zaA>2SxnEe{T$xKSE!|0Ht466gdcryOX5%JA!j41mH&Y6>rU-f1T235aLc#kRP(UkZ zq-4S*kTuR1AQKR^C#KPeV##uDNUjxmX+p)Xh$?mqef+#CVi`lGalqVv(t7%>6+u4% zg;ckfW^fL;HpFlI)I_DL2x}th5YS8kt%ocOGQn%1~%bGeduIxfO)Y9=nyTo~nV+P1Pl-7RD^}?SL``uI4 zj;2ed*S@R88&XKzM564$b4d`A&+%?ql8hwK`sruxs)-h69;Au61|mi7oA8GtIKJft zh2V~?`#l_u*62lreNgdF!>tGl+GDlA+zL{STiuh4qo8Z(&mYAJ^sE)N7m9)zhE>?u4p%*SkA)TG3$rd9D!py^hdO2!RkNc)& zWjc;l^IEDFDmmBbgDld8;JT?(KNLcWJchmy2F?Jfybz-bQb<_?@`wI2qEYj z+P?u?(_$t$+a_-Xo?dJWpO-8z;S^}ypkM-GxVqPxR&IIx??)WB)A0@~`#yE^+y1ES z6ssSw(JNbxU;@FY03919Z;~+=Bv*^%|40kMe#)&E|H1y@-L1ZD?sq02s*h()K*oPt z21ZOQuW7xYo6-}LYS&O`p!~t#{aHqG(X*i1DVyRQe!IntNCp)VROKtoAww@Bx&HR) z?DpBLzUAQgw1*0NL$(#exQecr!lp4tMC{!B3AcQQMa8Ubf_WT0{n4&kuZRq?(dAc+2mTj@l-H( zS-P}O3Kbr#>=unTqwmIqjFNum&<5K&@tmF)HM{}kC&ldbLa8X+5B&ddoeAXv5C>P@ z0!fDsl5`0FBk2%l**sPAkM(Plha?p9y*J?RXUUh}^gP^Q;e}Wp6UR@7Lc48T3xZA1 z5UrDfJ({J!J-;7%2c2=PptEY=atd<<>M1NlK3EHNGAP1u;Aqr%1c(N3UBy|r{q*}S z{u1DEq>{V?86Uyi8kw>q~Uxd@_iG?yb;e<1O(vgu1ifR zA^dmrQn%4WTZ0p;um4Wrj)@YWgROE9@~{p{`15iQmNu4|EDU6zj<3)DKrx^@)gX%` z@nhFV@Z$nHwA5E&`bAhBtc;pxcOV}R;tKTtq#mMDK-`g*j}`P>yTO`ALZDi>Y<^9E zLlvlj3?M-0uVgE?wwsARBhgSr!WCdy#c^I^Q@kY*zYPG9Ruoi-(;fIWZ?HQmgw4H62~Zvuz2o^Lgp@Vj@00;dp^cK#4HCH!rpf42? z<60+Cb>#L#y53eVbUFTWs1_c|S9aZj9-;+Kg;(&y(*!6~YUXm9u(9B(Kt1pHE7(0~ zeJD?tW@>|*$nFpBv<60K1)!xultcp57`Cee>!2`FiI5WR93$e^xdElEDX^%iOXUOo z5>iF~{{i`bZ9rOhVV>5pKsb9LaVa*%fy?)QnT9;KeLbzoZ+PL|!AZwWN(9%AAez(f z|2h$k@8KK`ao#>AyAQr2(e$h$p18}R)t+9lpsB%%5s&o!fB21T1dls(nC}R4 zPU(25z#RO%5TE~%5S;7Ba&=9RTxZ$X!xt@Ap-`cvO_YQKP2;w2#rSe`bIAc6nH~Nh z+)pkF{6g54Z#*qb9_q#jr0tCD(1T;#*;BG1gMr{P;Tu)B`?$`bcsN>gyoxAEAVtJa zgCd2CKgLs4NF3_K^v;#JS zpW!&-!dU4r=_U3r-FHzi%>VAV;oSz0D$uy#maX;x1G!P5q9$!ZgPZiZQ&W4>Eh}AY z)mN&8e^*M-?S}7fNL##lBsxC6Jq zS%_Q}<;47tk^g{u8;Wy#2zyk{1%u`h&WQ^Ce}qwwmQ@d{lzgWRknXiVu$ykl+`5~- zE#Mtkb0!Idl9bZgS@(@&o~MmZP}U-vKl?bsOrlKk3m|bG=_FdT|A7u$6F|=!wLP$X zzO@Z+{Y%~IPz;}0H#)G11g%AE{jitasHJxeFVXY5c~E+_+JWNxWN=w#AJS!oAayPE zp~T;iK}Z=DRQH$~O#zWuVdkH9K^jngJt$1)P7k9l4qt4m?)(9nHa|cb6z!`^iEr+} zYi?p1H1G%2PrK9wAektIe^{*gsJQjG*lIQ1#Yku!cw}Z|x;??PP-)9^^P~xz7uf4- z1dqYn3R$s11yhvt#er>NE&Mzl=x-^KM{$y`x$EZraBi;bfwEq~ae%K!0J6sH0v?Iy zxQX7>@lY({;&5G6GU0cm*eb{8pH>xlN!ntJ=r%_piJFq0` z{~tN>%J@G>{2Y9+^(H7=M&kT$ls#m9mgxnx_z4!9YHxR$yF_8+YPx$FeYV13jGjJd zIY5FGFd+fS(@x3hCJ;3B8gdnRQ*=Q^tJ{ABO=#22rQS9Tj?nfT=yo-(FklRv9lvb3 zxB3A6)us4=t&Yd*_tVsy5`TOLp;RO+(7E2wr;xef{f5$-utGk&Lf=FUSDK(yuS=nG zEV+ZdytZYpbP%`)XHJ1$Zdh3R5A@X=L{9&7rV@Jp@gIbQ6Ry2q?iWGczk*VRJ+b_Q zkS_=eX`!h=OP3fd23jlD+GpSTTVrqTs6S?QA9cMJ(!DX+h}IP@d$!?@e{AY*%DehN zwEG(Gx9AD7;1W;gFK+t$xj6{TERDI`Q-7dqkvrS^?`iAUr7V6a>#7483oP?Dk-4;E z6rU2}%XyGFpwHcN8P|q~R7 z7-G5P9OW_n`QIKC=!S*zz^Wlz&)m8Y(hC9`ba%C0oV1ps371ff_;W9ZFQPYJGB(J2 z@4kR}lz4}(5nr`iWnZvu%v!I+1CLnF|GkMwMd<71X78}z0o6U>{=hPF@DUV69yixPIidvl^cDx#g|w1bB%`v7{NtWa7sH6J~b zpE!Zy8~K7vXtE8reJ)N?N7Y12k3b?bX7bB4xV1TRNa|hmvS!uBfuemN*m@Y&`a*FJ z2-+_Oq#}hEgZ&tBE);zJDO_K@%(K(VJRZaT`W!Xu*8}wuGz0g)ts0-N7^D7mLSBnE za?U(pHEovRtH?hdO?x-B|Ms<&Z~Dpb$AS@|LIeL8Pa)d`#apDOFtNyLnqvy2*>btA zUqJG)WtujB?0R`?%TYd1x~kYJe^k|DKmgptMv%kQOh7rG@H$w?+RB1rQ1;!@U7gJU zj4Y!DzL1e*zT6DR^j(2WwZShpuszgVy9^keyLJ;dG_oT#87CMqxW{k|LaTQNn$i>< zV`{xwEs{X{!)up$1-wn__muuXLQ_c) zo;P{woZ!#NnCZm${$BM(uJ`#o;kgMLEv5ENCmAVB;rbb*FoCLXJhI!MH2p4GB_jI z+x_psd8#4aSq-A%6@5n|i7j9|tCPM*!+;_?+KABpj9?V%A?4bEmXaY_xNSVIFR%S| zV9s_fQ1vQlPb#s%ALGRrP^mkJQ?Y3hZO5?j%em?;RJL6az(T+0Lam7owodJP@4Rm-1*a?ji*574_H%YFMt zq6^YfDdMglF<=L76DeCzUd;(*KyzD#t|`~sJzOp~ywlM??dPZ8R|;osEWU5M9U(OP zLWPB?Yrm+t%w`tkZ&H|*&fqd6=wXruzGzLHXgjR^1HGKa6$j@nb=MBkP)ghlB5NhL z_;-hZQ99%7C+HWR)dEsBS$$-}YoI2~L21SVrgWs|{Bf{?Sa$qpLS$Z2Vy|)b$8rUf zLOUkV&n>u8vnO)!yYcB)W3o|3YFuGOHQw+j?yAylh2KFMm}c^prNM^1%sXf!+Rw@2 zd{J_5tP-BeX2I7{Oho55u26t>gAgCG!%(AA28S_fZGubP(3GO~`uuPUQ3 zM!Z1YYBcC)AwhlPZX_E|9OTygHA1%=4>j1h_kXHzC*?m=w59ro@$RPyG(0*h2+)#Ju@ zy-nBKtvWuXuPu$x-ABKp1yQ9d+r6m$s&7^t>JIYqM=4els%d|L5w@9ad0bzmG%*f$ z9}Oo`v7UnTVk8V6DfetNrRv>=r_mG|iNHNiPKCA`+BabvDsR1#|N z`T1p2t}1b5$?&NyZcoGSX-;_P48d%$re|p23r*?i;dOfP@PBv#9L(wBp2K`EXs=pa zU5FIzu2J|Z_A`#VN>x?v%VkT#>^aYuDh;bx-pDGiQB|S4)i?I1NK#)JDX>ltFcMYT zEK0eRt~tt?lkEh3fAruNyMh;x5R1!O+{4(PYA7fcmiO+mS~Ru{$K4vGKuS##d&?$? z*YBqRt0Z$H@NkzC*)L81_QlT}GYAQ~i^nHwwVeh`uV3@1KUp3>`b9;U2WL7jJLLw3 zbZfyClk+7_CX9mx4KcP5_S?Skg8Ht#EarxvP1z82$4(h3Wc={&498;b)Wwm#1~}ejs9)>IcZDCpJ9G&bBG{rp>#4M*kkMs`dmV zRgKYDWz*NpEJPfatrDqFEF>va)PAIOqLwz#KMmmV`~=gL?#ffLO{jj}(qu{Zws)TC zR=v58T7;ePR(v9gK1FheQSxnZ$@sXXO#im}`Lg;f`}^p*-9^4v#WO?iNd|s4*=aYd zarq$q#m!!gKIyaGm+_s*KNo);TdNv)ZmSTiE$KCM?$!MGXVw3>nd4ASlOFV#YFWqh zChRBSe=p9+1aHD*Xs}mos-`O-`%CABr4ZtC58_1&zc~)PV@YwR?_s6^*{t+#0ut-# zq7{Q8r8zj0le_@&KaJX^B&n~RM-a1#rDqpl?WASEzaJC0*BVynecZup(HNC+?qj*k z_=9nhLsPrEEA*XqQM~}Lhj0p^oqt5OT1&smS<}fr_N6&(zQVVw;`z?8P{^JKe)_>t z{TI$WQ(**Vex=V^VmEnZ(PD4^kxHR)W+8IlW5M= zz4Ex1jI#4|-gWP&F%xy#liX$%D|_QRsep&xy0!vlh=x&VHt>!Vw`yX(LS&F>i_}#- zyT5hcpAf|)-xU8P%^ciUF-J)9?PXL&ORux;i(L)OXg8Ag(F&7A>f3?1u!2;7?yCx= zqk6-)iWp1kUnOqRJ^H*X&Qs+z?|Ty-MC z(~@b?{ra;e=8GP!KEI`gn#|9P5jV1`{*M!EZP3Imb!RNl2&j|f%lHK z^$!}`XzJnJ`->t+M;Km9jAKXq?pyla+fj_u+fL&_!}h7cP}i7XZVacTX|L3t>(gYw zdtK}EvZF}vZ~C*E+&Z(KyMxROIak;RD808j47hzTQh!OVFfw2;c5No*2oy_WX6lj$ZD@tU^!O1?rq&+~gu^}## zj4SY6Q20IGI8e;`KF>h@h)?=`e8d>zZv4xuZ(*^-&(kmp^aC*-?Rt^GCMnY33E>Nw$9IyhzCfWD`m`UK!inX6POORbnT=d}D{ zxJ=Qx>t&5)MB2`6!9>|Flp+iKWrDOSlJn@?g9XQ_q-7pp)zCv7ku7vYWxw}8C$F%LM{#MVoyLmSlO0)v91;S9lxpxncN z8Zwh@7{G=B4*v^>{8Y%RqNfyhWGb|N$LJnz1MAfFR4QTl`gnI6&&CTAh0?%)7Wr4JFIN#xi9KasfQAD@0{hR8`-z@PQOnaN|~x|uP-Vlu-eGO-Uu?X2*|tXRm$o92H+dB5ZP zB=sRF{&wC3@RGvMcS# zhfIl-ip{)wJb_fh!2&o9=@LQ1gUG+_~uingKGd#pspJTyX?h7eFKSx1i zt(aq4XR@OPU(>}-(a?+h7Q*$`$K4NKj>(`NN+Ua5c4Cr<>IhEty1Q;{O$#8o!TX|w zU|rylmr=SPIZ6>x7efTY6|u++UuIzFUE%D7dzdj5PaQKjsn8^m@e-K4eYURE2d^e% zVe7Tku}M~arh&v`?cI38TvUAk!XV>rJOA1f!E1iDhWlFAYrf5@k^QHk#R-&-&gWDZ zDz;1Rlsif2zHl!AVT(_~RrxUQBc|Zpf>dd#g?=OJ%$oF3K)|1M3S&p1dHd#62>S4L>lN7rAB&fzt z@8NhquDWmWW6!YcF9;biW7bC+vuG~qXqhnaj>DDw zRxIrXa*af1;EwVD_#tBR#g5xIO)c|FRN>dWhbIL23dYWabMEp{DU;?;C*1jJ&N5#qR@Gh<;p5#liwL$4hTiF_gTu* z$WLMz{1EjRg$>0=S~qRzM|3yqgU&KiB8eC!gV{8dXDl*js4s~LOl5~Z|OZm57XW!GXCL$JSuMWDmJWFSKR|Awmd4OfGyIm5z32!4KgJv0ryV}2fbW^#|&#|@}}Q5vg&O3aJXilM7$zx>RT-T!>X=4CfbUx^}4z9zwd*!8I( zdif|h2Z4?0+Er_-TG94#zta}|Zkk|>PjO_s`(^VSE7bbZAdH}TDUIyAWio%< zPoLRsE<9T9#VL4s+!x2^OLnnU)<;7EwnLojKU|EC?PHx#+wR=iP0(W1c#L7voK$Np z33tFurB0Q*#6h&aaMM}5R+XE-G@-8O@$LYBYJ1jobGYS3kDiSm{E(W2%pFhs}n{oQV%7wF%tONwC_-Tdvf}PK%2X z6oih}v#&=46`BnSma*mTddVPjxO~QL0c&`_gyw}+$hF!qG5_1{UX+|m{T$_-)NhO= z;aXrlURg^~iMv19L?(C@r}`c#NS`dXo6`qU%2vJ_5E~a`i#Rks!`SEZY=JvFHS2m@ z>&V?j-AKg@LYzl^U$u4KoG@V!x<_wTaFnBAMg(8_e>v1_^}=1Ad-fIi=(tT;K>Gky z=s`S-O+Ihi7>Pn%2i5Mi0Xgzo6XT2Hi7>FUs?%iR@4?!awxB46r>~d7p8pAQrV?yz zKSeT)B}VD~*o0|3yR(<3Uv{Yu$x*bn!!;Owz7E6n4o1;B5qo|RG|@(mbdFU zOqf-;$gs3>2tIT6P!V6_8$#5i`ZMx!? z$^y)NGJY0Q3VOwc+S(VzY$qXCY5@bTzOV}i^){KBH2EwfW}^NW^06Q1CffM36X zdh{KU<>>Ax5!+^>V@wlidsMNMxvs>YRqK?n$L0-zFI^sguy}b5roy*>z2`TTi+?}d z%qpC6DfUq*Az+|=d;I(}hn_9W^Yud~x8F~V^*s6YKSM2#MN&+_Gsnrt@=34CGxxlP zo6k1wd13?xMe+mumt0Hh#|3Fu?Cta(md>c%lN0#joYDv{@LRkO++vpJ$&ynn^ATEH z-o*}*4hS@Uhr#&m5HM!UxwN)22KYWK(Lg-RrF?%oREF6;y)oY0q9 z|HCy8B%hksXU`{T)5qLSd*&1O;9LJ{=v0r#^~xWtnTt^!Jk0cbOI+5_y~(r-HX)OY zv3OdK$-U{=*qy>-;0@yg*n*%73G%VE;O1u7U}BvSjBJ_RzTx6^%$SK6$VyH%&~=@@VG+b-2wl{AIl{!5p05LG@8A zeqq{lnA7?G&K^=Wg|vTyX{JtKXEgc9R_C(0<t? zgMI|}2DKM8a%lrt24_F={yv|3p`eK@BeL1m`a^0P!Wz|!Ito2~p&w40L+$(H0`3@G z`4-m3XVlO=Lx+C8Khd&Z!(}>h*6P91-Mp78|M<%-&%I*fnLp_>DB4=@B*!%ybDPer ztu-40X;ch<>ObeVGOzqiJIk9@W=rsY#yR_~RMaz9s7-ik=YxiCZ7{KlXwxpf-eDfyeuo!wUG=?y)}>jG=+Ac1(V9x@GgQ7-wvXB1Wu zfB7IVHNy8u$?)OSvpR+LiRy~KGZm5Oca-`#|D8)nQ*P#OnYVWz+ZZz zF5Q9vAKFKCYmU;1IF0wD3FQYpu7IWWMnC{6@F16rPD&dj)7<5gF0xfL4)uM1JDB6T z-VQN|Mx@{3lHA=LjiR1zaw+A!rj_4}q0uM7xZw+*-#2MwL_e{JPzropZY{saxYW5} zZt0K{s8dBCRHV`<88E;A8>3W0lk8>V-^53XYKRonjiVgyq4TW3BoVcre3naO z6H;YKy1Z0;pO%(Zvq(}if)`^jnFLn!kG#w6=h;G%SoG2eDdPxfwP|H&C1`J_(r@4C zyiWfooUo!n;}48mTJ{DgqP(V=Q}5Fn?B}lezo8Q!7buL~RK9||F-3gAl3wOC^hL>8 zw0y9{Mz=(v9q{jSzD^QX$eR#?^MY)R?l$&Gl{0cbzmjr{44A z{=C7UJaJrjCm37tc51S1GDVj1BccpZ@ULBO?{#WScxd_Gwx1X!s><$LRK9_VBjZu) zF`HE5V}YvPPKLXx{LdF2Bc|~Rs*SR8Ol|eS@|WWrwms=no7|$&=tYS|Bln9j71-Y?+wvKi zbIwpQ)=1e#ZlROIB3`XE=1hk2H&>`fhw(EF;)suo}#K6rQH_g;;-*zV^J;E|h2q z+CfmTeeA^R_Hjh~HpMoBS@EpJSw@Bj@(RSa*Hq3%^68D)# zzUsKQ&y**8=LE3eyo~!aXT%uFdR0>q>yo?FXjv&DmRoCXsHH(m9RK{s`^ZxzKcOpBi4N_R2|t1hiu0k@1ggNCXRnof7el? z9){X#h#QEeoCdXW-As^rbk~^r0ku<3QgV7F+rLx2Ghs#4`aSvh2>tcaWr6RRHPxpdm{7n)Is*jhcMM&Se)*wEtEy+9Kw2zh(|Q8ijjg za(q}t5%0;mymZYU0sLUq?f4o1pd%4)na8Hh+0TE{>ClUEudsgC6k@>Gz1GJax2FAs zQER18T#;Gx8PBpv#vb2(`^Nvq-djgS{k8r6fT#mV42>W$v`CkL^w1rOh@?mlDk#-9?JixzM8VLo_gp);>0wdc z=F){1xohXm=%rlfr&oxcRi3g$N5qc^us@DZAWm83`5$ z33MC&Wc>R@kq&}`6*=Y0$wr#Kr3_oaqdNNLPl*hPkszfpVwNb)XnjbSp)5z=L3&hw z3CXuWqy{jz4EBDtfc<&^5XS?3^r)Bp_IeU^&UInSo#;ga@%$+04+`oB`Qtp3i zu9J-u?I^$048Ps(kcMnq((uN5_2nw<`h#fm6)e*DQeS-v$yCE|~Z%`$UyPS`Qp*zNbn`Jyff zFn8~}j4D)oy8X83D)V34O^~FkfsH{qlab>ABqdK!!VNE_YQm73lWQ}`N1twlNrO@d z52-LD8RpOzX(#roYU{4LZ1M#P5YZ1smj*tOifJn`IM|VN zl(a}O_B1`QWzhN@mOt}6r$Xmb*a9DQ)D_3fCtfA7Q!BSFx0EEtKfk|t+=Sqf(RrVI zstgHlWXc7IyiEuHqGwY5Qhdt9Pqidj8N~5xH6ncxXvNmer@UPGmfVk{m_F@1cgcUH ztW!MYi{cyslNmpMTk2u|4TtQa@6DxmeXaZKHczw#^pZpHdH@zyq^VI7^fd3m`;DJyCs z{o}j`H4m6xCOXhM_axP}EsGTz1@+##Vs%sG+BnOWpx*{vO~Zpaw?KJeCHd3=XxdPF zUyJ=fS^0?#Sx2T(+9;at#i`A{4zFs{&#lquhD^2B#w;((Ja4z6KycEUE7e>3iyAz@ z^)N_tPUDYIAzIurs(j#6^KI(cmX)Fqev8P^M>WYZ!D|^6V(lC2pUU~w^M_)`qTmwV zKh`hkTm=-p1sgvige9uppaEt25AZ>8Fsc#qrKsMr8VB-AjBRs|dsdyny8`6PlvU~_ z_qS`3Bz#S2dcq{z!-%6sSai57f|sC3T{B_WjO;`)*dXq+xmxh;=sbUZLvyUqrp)@3 zPt`Z*xV=j$c26@p^K~#xHfA}+wuVeN#WOh0K*G)sC<(M2`PYyPqNI$8*J<_^cFT<} z`a>Sy$tz&mO?1!@eAC=CoBpC%qj;ut{m}DR{~U?%S$vI$ls7{|<)bzB?Vme%%uOt> zDM`nsI^2Y+FI95mM~QF8!Sct`%lEsdtv}{wj#C7`Og?LQq_p2n{nXPcCXIO<=HW6R zd-pD3jjxBI1j(Z8k;|W!=XfR;CMN!JEhca-vuR_Y`GF*Q%kjrZt#7%(gR`BFF_(0u zzaJgQy1c_1C_463G8`{-%SP3nz9{3rH=)RO`*ieLRR-Ai+@&6K-S|B%@EP&yh4YvF zxsbM^NV9D8J&2|HooA+keP%JT-O7(>v3aIBCU_p`aBsY?;Qpa9;yR2UFX(Sgr;G{5}MYw*A&?a8lj z1E3fp0JO;P-kaF2JKEC^J%Bn@dZ})|5(~oa?T`qnm!Cd7m-wwyUhl?vMYmKBO)4oUQb{(qRGSH#4P4Gb;B|(Qi>LO+0zt1^eb?op6{OR z&5(ReU7|XFVGGu}SNocIHF=Q^`)h+}=u~SSRJ~gIiY;i)j$9=tCe8{9s(73#eJ3vJ2y200g&ia+LN(ABWr#m{=DvA5?$DnANMh15P*R%uBdjOv<-Y`(_or6r?rZrPrq7{ncrLYOUu03GMlWU&X<7{+WltlJ@Ym&E@Y4W4{xL{a2_I-1xbJ$4$@BRxBbS#uv|lL&L0{dYg7uTC518 zh)FJXU0zW67!fV+dUH|j-jMajV2&ISsfq)46(iu#H~#IX!WXbT_mBiW#|(H3Y4I=m zh$pvw8nQ~;0M?@u!Q7h2Uy1Y*MWkKFU_DsT_9;llXXNnwets#k>q}(Cq^CY{OApev zR3Ce`^GNZ!{kzOpb0q}DTd-+Fm!gdJGQytozY zfX3C|3nh`>G$-cDPmB0y<_Vn24SM!4G%9Y7Vu?f=G@cS9rUs$P8k9j^gRM_Zzc0|x5a-w zqw?FO8o*%O$UQ~+gDlTLtb^7MXRsfYY&ylpF7H*)vTKdLUON{WRy@oINQAdG%Tfp? z+f+`k`);;Ivt}AOY_hOPIP{_|(=J33*uHC#ei*k>x_OnMkmoxl4)x4Q+A!cmSv?f3 z=*#s4_|rH}i4gx?131U@``j#;qaW*#7ANwYX80UVhTsZ9%^$Q+2jG4B;{pua+kKf# z_Gkdt?Gew%$^*rr<+TnY;Si9H7LpK|grUkZ^(jw|8$C1T`C3c%G|fw*gfnXn0uz37 zbMUcj1JDVP8u?n2h^J}s{XB-V0DqSVl@l3FbU=sK+Z37TTO;Hk2r0}Nrp4U0(~k;o zjxym&z0g8}@Y(8y0PdOis?@kL~uoz7ByS#nJ zFqNcfe8-ZvTr$(ZJ;~r5%`nH-@n?e;T@mpl6Hi;FiBl|*z5EBveLlV3-!CBq7w zB`2@|{%Y-QQcZSss(=Nu9cl6__bZP`?-D~V@5(08#+3nMGU);~8tskfZcgcK6`2ipkD=&-Y-e=Kg1+0}Ze^i~oA z+hCZ}OXvYW5#f3%y!xE{5E-K?oOBIb1_GC^GdG!(6l3wI$eKYxJl9dUrxdAV;0Kt3 z;dzL%+aw$2K(!V+-{9ZGe^{i_3GH!bV`hfjVF2ERl7}jk?ey&?$Xq#;L4Ka@0H8uy zdR;j7xLf%|tz6(zg^?O9-dA<#9rMxmI=OF$&fNg#Y)^iZ|`?)v}!tE?c)ZLIOWvcxm}vglY0~Psv3q4GT%~? zFF7k#MTISIygtBq*Dy_75*f@I#$6hI%X=5P1&JL&xlV$-HQSX+lpnxR<|w)FHJnj# zeD_W5PgvZsj^envcCq8jk(XbC8qlW;pUt;i~q$UC_BQ-U5Q0|`S`SWbR zz0TU~5t@B9N=gcF@>@1eGTdhyj>lIX4fjH_#*LSI69!!zulR_b2vTY2>ual++U^Uy z1?S#gPlkc_sfLEm6WF}G+~k+t_WOc*XqO+mQL0oF#LG zTrNR}(`ruGn^pq5j?KW6z*-VcrP-IH&>3wPr6mn==?&JbAqZ9090q8kW=6rrrYsTv zo*t6D8u@WhQ({XWr|o#*SyKk?5_BGStr}`kZgtxJqH-1;%N7k6l;-Bbk6E{ z*O+553US<6+Ow7h5(B?5vf7I%!7m&Era&5@b*#^UUVJ6?c6e1Y>9?^zR(H4KJed7n zj@Zdkf?6ActY>m2M9MPA36qc@WbPwedR_1$Tl4&>eKq0O{Jtj7=Sdfdp(*|fbVYFHBl)oT0cyt0)km~?OBC`gA0$hzO;@S(1FYf3{%o8@pRTOZ0v zCtx7mvU|X9qZp_!;EId`hX1eWnsD}>lg9Ivj%B*Wpoeuq?&r}<6q?s=egDfD~UuH(7qVm)CepK82= zIqlDU_zS29dU~V5kcacdSiWw+g)m;tp@AJ&@O>Ez4g{q?@UA~3{Wxki5ln25a_U@w zkVKM1<;}0}7zDB0Io$!XLlEBX=n+g$z&?AlTme;Fq(5}?5M8|)&AHys8z&$9*rMtM zCaK-Ya*)FK2dq zk3yX^4^YFor)L)xKhPp*@~!f{FOo;oRkkI^;)E~cDnR-V(e=(zo1eeTf6)kKvCE@= z1Z@mGrux@+jTI-+rl%l2ma`hfg`2UG6s7eYXJwT(QC?&K5e6ZUds?jJQxeC3H*DFe zu3}|0fD5nR#V|s|cAEEJ<$dvy*iye$e1J!)fle1^ zcl>EpZNao&Oa^HW?HlqRw!jH7Lw9rs+45wzv)n|wU1`;&zH;On5Bm*=x(rLcGZ3&z z@Cpxk6e{_9a?sZ;vTA^+<=wrm57E3;WzQ0Ui~D3_WzOuQU$FX|HuWbUQ8Y9>`X^nx z#<;VB8JHxf0g=7$9dyoEY}D#>$?%HELoeKj48bvq94q{`U)adpP(9H}J-y_Vs};26 z@Um27&$nv>Z&kWtGK8zImK$}scMB+gw>x(qEe3VJ9 zjemC^*SE~L8UDJs8w23&qzmu_X**0oE;%FppW-AO1L3y@IclZ!A6gN2_P809z4xfN zZtcANmt@SGrEjO|eL%5o44=N1lQDH!F{$ZZ8(U21y1Ji1Pq0w@n~zkjVpvSx3AglDq0jUG)jMeW>h zCZd{sj9ZFA*9H}#Fgy1&jB{Vzx}eA>_r{S7{NaO^to5?aZfX1@Gr7??&!sfm&-D>! z(o|HO*_%-dcO9Khh>MMQomeOy87i+U)FMJY9|6 z5A5Yoqc^7kt8i|weqam}i5S@G`!2#ZLhr__Gq9-bxMtN2KmV)~;bq{lG3GpZNe>LJ z*gI(ODH{R7@DDjbpt(UwlkOxXh$XkYdxrqWp$Ayq*v&|X6>@Otj`hC+(Er5@+gq0d z7M~RcT}+9?qqe8mS(<-)ER6zSZRx$FFtH(+ui=hr04U)9 ze=LIk(T?F&A65bF;Q6p*++I+d4QTh+P`%7YQW$DOi_g=OM?$^yB;C6XWQhs3ZaHfT!`gt}h8SApik@A*9zA=D(wk;Wqb$wf$~beq-cl z*kXrhz8T{1tXWDUQiKh3I&HP-NC{(wD!`PK>OLzo{RVKr{HeWUDR%_}_)~BL8Zf|K z_i(*Guw_OJQYBRRzYsJ3k3#ZKSe7=%$xER2=sHb5iIptq7sIe*NHCC!baSEf2g}8P z@!@&<--s40?EU@!MBN_XuA3H+E%*3?%jTm2G@673ROvHd0gej(n?{2RVVvl3yI=7x zb?Y}5UKS&|nG{3DPKE%w&F}Xgs!+SWS?ZgbB-)=I!6K_8dD=7P@6h9HIzmY3 zD8uHr8}Du|wsY5CCB|j1zZf)9nQ3*WUdzlB8yVXl+n>IEIJ+%Awo{yXvi_^Mq@aT0 zkSzki{LN;bI^Ycr)FvM5NzEf2{U(*{Mh#qewk^)r8yb0BOVOlgMaSVbejIK=*bqyA zrHh1qPLW#zpKBT0b}(k zh_c}8F63{o8{qMNcHr?bqv~2(vCqVY@8NAKXB1i0rTgo8K8n$&P?VENWg=C2>rl|; zrmF41JJQyxR?&RKTyrqh+xw!A8yk4MAOt zfv$%47iqzl?g-bC;0jscFI*Gu@b^D$#I}?t10$_M7<9jqrJ@fEQ}7VRY<@k6Xc6=p z?&vo{M%vW436a1uOpn0?*U6w_Y8u?Q*wHC+^|H}?jM*)YSd&A+T_~u-ziOWq-eYKI z#{{wIC+{G%3&zGiaJ|wK%Ep3bk=3K4awJ2^oWZoC zkSjRr8`t6pHIyA_+sFTJZ4#Y1L$F87I0HFuropijEz)2__yBNA+|Hx;KT;DuJ6=vQ zoxaZCJztRNxnA;FrNh_%YuHN z#mloDcZ^iTF`Mjrx;6v+A@<%oNUKV89~Nf$u3M$rvK;~A zzJ9GBt(=X34TpfIJ{t|zDs>M-T6-a@P@-GwX+kf5nr#&HWOLtc-NI!- zp<|9)ul-l6f&%K&0laa#$@|iZ4t&S$h97R$nH~japtWZ}0-we5<~>zWpo%pCf7)+| z=G~9m;mk2VExgAKtq*jbcWrGq?QEAfZHtR`lk3%AA-da!N#`NuYO(e<$qjfnC&c=b z8P8g!A*Ml+l6B^x{QKI!0b+JzyXDN$l(@A}QtRGsumm8>_z1lOlFCtTX3rUjOxQH> zyKt?$Jl$vlUe|W7XM-m$*WbOL227)Mw3ZQ9w@B_w;6mvZD}7n-_vL%6^ei{X#3c1} z36RlULAHVjszJz)LFwPqN+|mm+1udgvKvSs#vh_jR5M9a?WH+m9janwnNff`1!Ye9lf=cbmY2y9;9(Edlrn_xFwhmGLhF z3$J(;HsIDFRv4Qe-_p*)1V`BpZdMO&Q$94qmNv|eX3ujlRDhlxU;aJeqwK_Y@K zH*TVuXP_AS_v&{*o9rb-gH%Wz!!e+Z(1K{thMvxiS)S{hbwhFliEc~!F)8^yAE}xX zwe_27ws%UdbL}6=8;I@s5p#iM+x72PnbSF|=uLIBJxK>V5!wqk14l4&7-h@jJ)*Lk z(xe&oGQyuZ`0QwBhb`3x#{2l}VB^PoW+%zZAA{k;@_u< zw;nGN@b-K&w7yl>MXFXGL?qrruqizLu(S0=BN#a|fUmUOaLa;d9>Jo89*4^3+DLMj!f1z7K?gUKJgTabf&yADcq>}GH1ssxc{y0glC!t_> zPxJEwL#1UvosMIo6{P;~V%~-lhKy#0Y7PSIv=A{_R1N` z($;eQze~yIY4&(AG#ur3zN^GAqwtU!0xcPHiW8l(Va_5V?GhPY24dcvk}G?fgTRh4 zEC~?J^JDRuQJsO^j3`r&WeNs!HapGWGca%Vk93YeJot|t_VsA9v!9*BK8^Ur%(mT! zz&qhb-Y+%o+Sgfkf31yts-xlWhciudt^m^6j}ZxFYo?Hp--U69d-FlWr$TX_bgRt0 z5!Wm-IHW>sBrW{FSM%rZmC5i;0ToVrJdeGnK2nlN5SzV+U!)j87k(2U;9?GbE2y_3 zrNd?I1^R0rYK6-@yawdbcBLFnkVp#^y9aw6G{NScYI6X-qgzy1*M`+4^K}?ey$Nj6 z;^Bzk{X5nDx_S=G$CWR@7uXB?a<7l~Y&7zYMVcGNLt#6`I&*jNW~GM2w@3EepK=v5 zgE+W!Je-=3(9fh>J5?UGDXZd^^~!=y;icK$bS%dy4a z(~{zXy@L=@thcm$5w*NyFf;}%=Jh>V^z-qACmWO`pa%~u&|($+9iJiQYyGbqv_?tM zOB#a!_al`N*c}bLZ1R`~0z-T-;T{sl^Fcn2&Lrq?_3aek$#EeG00D2c6qxF01E!-U z*v5i{;xND=r9O8wLew366HmOKuRley`n{XJl6EydA4xJi0M#$%w;;haXdc9-OC&dq5==^-f z)yW;N-2BdLRkz0(I;2$7x^$ES7ybiRrM>DU~dezzAWHAmip2<6$fk92bUYH zsJ@*ilI9x_Wrw<$w|1o1&@ouTE6fg!9W| zK7>p~&c}?tuyaF0f`tiF!%>8lw<)oqU>{nExcVq;iW(X7?FO`&Nj~&AOUmc;___{{ zd-J7!Up1MGnVy^{#S__Q-c`<{x4?6|G50JHE1;rflG@ZsZr)QRVPJiEM=}s1#6rTB zH09UAblaE_2S$P!m(12G4zw6uqen$XtV0ZY-3J}2bvNOCmBH&KM z-M&|t7rGE&s-UIzUsbS>V3(QU!8{VihYx<2M}*mh74X^M`O$&Flj6W#$D{5jHID6{y{9^+x4CwPR$(ev8bo$IK&NV8-0)apBlQ21M@p^jM`w60u4w9|8vF}-TK=updtT5Yi5gJgW>BL_K&xL z!IFy^EP;&+C%f zw#tm8>}@J+T5zXG`cnl=1sg*^k01+y(DgqXni}&voiH$;G16XK0%#Xa|3>4#y$%}k ze{0Pmay0lf{Yv}=zExgN|1~nQH~w_T)qiz|kgWf%m}L)%DA;aNb-97bJe@5YG!24U zm%+|DiOp_|`)}*O=F`D@dH?m^AzW-+5eo@;g}hLj-*xCLyteUA$^T5pGBB1=6tK1f6##v)cRLcy~=aKuLoJlq=f{xseEH(c^bk#!32?rb%)P{fbG zc{N8_gFvMb=5QHfoP~MD%f8GXwMWb0XGJQkC8@=q7UXKW)ZTzkOVpT zLQ2I=ZDC2l0KSIiADLd~mn)ys21B*y1Semv*+DB9+v;CWpcn_}BlDsQ^j4!DezGY5 zONwZkO(w2e6uo{LT2_WraY)@yn~zL_h6_LE}^-S6@E_Hw-uzA#(*$J zkfg6|vHn{9H4$c}AY*}kRRcqZ=6@GpWr8NPj!u0c844D<;2 zR~;J4fK>24LW!_HZ?jO<*;1x^YegA|WnpB0 zlLbXEOfv?7_Q-%$^Ga!Rk}CLqJZw2*rT_dxPg3U$63or76h%EO#fqXQ-zhyI#z_%3~P96C~eqSz>hTZ%E=NWX<;Gi@rX7i+;6IfSeE zfW5QcBTppMhU4mjx@qE-P$aFXuo>np9lwr()N10$%*}D_Oi5DjDQfJUi zMdpd31-h!W_YI^b5X?s|j)GEoQ&LV@*Wst@WX4Mll6Go9%>*ldK65cCln=ZIal=T7 zkQOJl?RBs`uBI*-fdb|kP1?6~qoD!hpbzw9?8aD6<83H<_eS%ibm&djRUE)Gx>*10 zLyywt&OiQM;{4ymy0pU7f}IPn7o7o{l{KI-6{HUCz4yE5y8tNH7npq%?UN|}R1}5d z`L7YJ8I}a~_@+#^`(PI;T@QT5tKW`|m_BMRe;aKl`;-*T?VpUo_fg4b9h3Z4tmP8> z{vf}M2^dxn9o~Z^%#Q2Dg>FE?$!t9a{7z>~ZoIQT)62!nUxm^qb2A=4GX7{~&psTy zNiz?n@#MNV-FQFq@_Y0TfL*2F&8PQRg4TiTc~(xd;QkFtkjYLIq~*gLk$t&XAohEn zhCmq>h{sc90cTXlp={Z0kmLKs(VSmv8t~}w%wo2Gi;)q~mI)funB3X>s2^&6MghM)2xBTL@#2QZ*u(1fmJb3-};S13hJZCDY3>tU_|VC~um8 zw|Rb}&*<{0Wd}w*jQ+1!lY6N?rNtr_4j92b_TJRwMzO;l1AD)snU^rvi^FcZ{Ux1dc)qsWuEh z%Moxd5s9xd@_sd9LV0<5PTRdJTLHvkKVFdtM`qnL&hV`{kZCPbsjO;H>g**H!&ra$ z!lQ>NtR6^XW9VkUM4JJG;MHqHbgK#itr%y64;5Cr{D9=%1SowI2q4oqfl6lCxdy;nbX>-pa7?Eawi z4*GT7&!{=?{pcNb+djZ_SVUg}ZL$$0D6D^OAu+j=>Aj~Sy^s!u^;fA`j&H0dd?y}2 zq@31O;~vvDo@V)vEmt|=3%u`}D-_8!fS~D^7cJtXr_LXONpp6q13M@9B`eGWs6$Oa zAD#j@2Tc<`6M&n#{0*dMWrk=)mvZ16FZEe1wTs?sb3rkNZjD6badi(YGU{tK@I#wW z8Ul*0CqwTo0n6}KN@NeQ&eP2(5FuT>E+39bm*fak1y?}qs(F7U;Im*E5NS=0=|K|h zZS=ENS3f{p^imKyG6vZXr35~#q}rc)Ri@l`AVXe)IlHFd#sRMac}$)RhV|mYnsu?l zM6VA@9;H+DVMOR7Y$O^(cP^Ol-T82IPJ2C*4C)88;FmhSM>FnTv$77DjlD1FbC3kn z(+@Oe%Lk}6thqCk&>17V#&&%F4g~ayx{LE$yIKnx*AuAOuKs)yhyr@`sn<{QD!#S$ zqvqOrkO=O=`Mc@qjq_cf@acU;-1b+>%Nl(kt>c_TukyG`Ylp${wWfQ_WKpGqVSWi0 zG1iZtL<={!ZX6^4xbtl5+w6>s(*tL}iw)BYbUF*ex>;`L(gyfqID-;30UctaSsFO+ zVAX*ceD>qLwUR~C*c`6w`bbjn|FI(6j6}2?M^wNdB&nF(id6RDBEdYc7;+>=t|hR& z%qrqqv;i&ibGQxk$rK@j)O5!44m>Fh9Gwp!<%_4e(xa<7&w^D*c5?2AQ^JWXLiF*w z=ocA=!+*{IX!kVm;rh;8YrD>z5h8xPVA3^^?)+s9#DDNI7%u+?&~6sqf>EkZ{!bsht`+YFY`g%yB{=fzoQ z|KPRrq3$J54)M3`oL|L&kIuwqVhNPKL@T z1g`kj50l)_Km<-hn*|YipY{RB8vp0u5b@ps_SRdwjT07;CwNzi?}l%c4k}5%-*nH% z-@TUmK-&BkHmAP&qt!_D^?8}!ntSF|6knB(c@x;XFe&7h*<&#Tp&hE)O+HFi6k)3z zPzf+g`+u*>DIYCpD+7PZki+;QUdQ9e;F3X=@vr8__K1r0gm1-Z(K(@j3itE~0MHK$ zkR+_EwZTFVTv8DdeRv;qvsX7oQO@%D>QK%lDVzf7+~qZO8ct0r$s!!2?5yh`7$I}a zWD_wPZzYFL#%0U~VH~%3QP3wr*NMN}LD>o>yiwz6=qijdyM6{A=nb-lIqwgMp5dui zF`D_Eq32G|?R1?mXih<7+e#JBQF9CT>kKH2ccj)7n^?Xo^4tQ4z z9z@6L{0naIy(3GN0u@Gq%3c=#z_}kO*7Urgwx%3oUzNA(zY|GUo}BLwe(*W&jIiO~ zz@~n`Ktpo!D@A+NS$!pWLWg7n;=;y1aL@EF#D5yAV2+xG4ikStwFZ8bX?5U))#|bE*f7)Q(2&7{ zIHZ>hCnEwo5;T7Zi1)NVg&oC3c0PIx;{8~4;I?`4WRU%dKRmC zQM|LHAtBv~7f3gt9QcN;{)`x|roi;ZX zlG;sWI{o9|TBQb@bXP~sA&eYn4;AFEkj<>AY*fUKN@zmjRz@E)vodM;9CW!kiAFJ^ zCPS`PMq=gqglm7j0?M)%3LP?+<%F8&4&QM|w<{_3F1gylN1i)%o(2(zYQlz?#A(Z! zY6BV8SpX0Uaj7Ye*Ray;!I)9t6kwUxBeKS?j@45g%6?S>9;C9xSfb^LYmsTa@|(-~ zbAknJdxUc6m8&}HXk|PmaZ@da>z68(8j^6oh?{!J{S_k+pP129?U(2P?WelRc$^Xr zSrh&HV`S*DQ@qwEi=Xl>7DA{F*Cd+Ti&#-9{!B%?!rmOexY=0XOI+^Oco#Yr5@SkU z8$4f=;2RI$yk$FxIL%z@3k)%7{z`f)cr)`hy%a#?$RYv zgXKzcqM&P=B66BSM^7ox;kvmi`7aqs4E1}vLLd^wV)u9#Qxx8YBQiIDp?9-eX+q&1 zDIRf@a#*`*^9kc}h_xo=DV7C&R;MT@Pl82S@AWOFlGwI_!HNyM3yy{`5m?gWmu$@5 zz6rl?NE{38dnaD-q$uiYI3#?L&y-%I@r%drvh;UJWyx9^_d9k42FB4d;_%QxU~fuY z4XcwQMPEHR98QAE(&=Pd6={vSUqdEVXmK}0FloJ35c0;_W2G|jAVr#~y|-NG3s#VU zZ^rAy))+HxyfG$4+xCk18RzknIR!y!W*)|SlSIRbd$gSd^qu=6ehz4kq*@p{0JX#$SskDohXJ*gyAp1 zYtlnJ$Z;i4Hu8lTWqMP}r>$wPoyX|Y;M>weINY3a$}4CgP#;`Mv7h176|F84GeNB( z9g8}YQwy=Gl>@@&d&U|LWKhMW?{$`6+?=uyhx2R-!Rape4JSWYSi^0&1_S+eZ#4|} zf#W0&N7&o&-mkz|U7vgY^Sl{vp5~kjD+-C1|+=-Rec;JRFj*NqeklVz|eusb93WB+$Y_i(Ejyx$}mONwlF2!Su10 z(=Lmj{mRML!zQz|LHt7`aiQ;`UoKkx?(LIcuVbkN`YGB&^=G5ftSDNj-NKyF1=B=F zL2(HYR6EP&MyAadu&LZJwcmS`{4HupA66c&S&^DCx4#g^x57tgVqUe61BC@+zfVj+ z%e}Zw`6G_)L8@B2o(o8AD2^BtpTrA~-Sqg8%L`qTpu<8S?Wx2NUAzMDmlob7|zW zE)ks6xoS16T!PAI8_fdbPtS~=yYE&wm;4j-db!Yc7ER-CjGN`&gBM2*l_2aDSXj&c z_6?s^JWN6*CoOM^6g?wK|Euc-5D!IuB?Kq_wKs*_^e5iG=@t*ycRG2fPDUiln3SNv zv-vi1LX+P5vD%~PhE^;r!bTOitiA)8tJSVS)z8%QZ=tnaZnfM^%F@N)jj-k^)PwN$ znnT)=wHB`1raSHo0Ux!dx5#2X2D^knCNNse61Vx&zn0S)UAG*?9;acHJdwz>GNCy; z7Hn>zBDlsPEpdDE8p&71@Kn8LPFjYK9yC-GJN5DGHQOVuDwvqoBFZ{c&x8&~hS$p9 ztt+5Z&-8yIT&c1|l1`$(Lk=G;QaOaoxFUC| z1Pp^|N_=F$CZ2<<0z7T?E`bHfNG%L~-ZJ&ZxE2+-I0gFbZEaSJrn~WdUTa#?O<4Q5 z^~|HjUBQqLm;C76k&;}=T_XkE1E2|^D;Qk91083UL!($L-<-GAQBJmpHD`N=@kzCfQlO7|?r#>z-%Hovk}K<*}*?ph}{CtY|)pzT-mL(>j47g5=Q z2S^qIBJT05u`0^v57Ez#?BNsRuV-G{NxEZvCMB-t#T|ZZH>R8KZkf7s(^`pzsFmqy zbdcAvkEBHJkB`@%-virc?=z60h~2M;uu#@16#Jk;CSyxOWjOEIi#8JWp!g}JIPA0L zM1N@EZKu>#kX?G&-OD1Zh_AQEwH(Wu-At6u!WqP#d9tbe?#O=RrF`+7@g@=X_nsr* zhx?VmQ@;xahIC@CFXSdHt{5L@oot;H78eap1k)|j6#CP9lr1p;=L02={lkD;{&$hF z3C9yVnHiqPl4pQ{F_}QmlJlT9o9-zPl$tA81HUX`AyqwZ zCXlmX)o!w6(U{Pw3_hYres0H_fSVa1EC6}z=9_zwj_|Kwiw+SDNJq4>L}X;&{RL?i z@K|K&1JQqzbpdpqcgMsHu(JArf6-;W;`)z=?r?F0>S@yJc)P8I6fuQlJh~0XWRfw7 zE_O7#hW&fskowOO=!;i~M>FqDrR!F6BR$*uD%aBadQS13%7K}0#$5ALP|2Mc3+kN6 zPV%4t$=FCzNe=r6SBo%9aFS(7dF-oONAjgG)3YR2JZukmf4#(3@e2PLKQ1Q$Mc=7s zwdljEt>L80`m%&w`N>fkZUJq9xNOUhh6%-6ei5-qJ0-~`AOBx)oO#*TN|PnOr<&=rQ$6ftqK*SwBG4>KiZ)>6;q6NJo{J- z)0U|Yri?HWOEtV)!f`}dXjck}9(sV;t=4Q=zfM|lOSS(J#Zfmlare|hKadql^bZGoWn zX}<&GO|L|LsyN*z~O%CQgKjJn#?shr#a!& zQ3kZb-rGPe-AV6&g_$?{B9~Or#mTmRE1|-CnStHxNdpX4hZigAmGA>{*2#eN3WmF1 z0m)CS4{VAf4}+^;WocrM&4p#Rn%xW2z*AciM-JjKgPy3@3_`@QRzSZ+&FUuv5CV`z z1?xinbNir3$%g_zTS4vPO=@2c2L3iUlm?)je#(HmZ6FW><~x>3-5JpzTY05u>-KzQ z{EbK7vmmA|Z{@e`bMo}1o%{$P#o9)%H$EAAv7oR45;2)J6vM!{Brj1G6k%)V@zcT4 zKm(fwo5n4jiO2$dNN-q&nLNKb$JLwo>i>^y**w00*J`Mh?Dt{ZW zS?wN2CBKX%xu4i7O@o+;YaPH4i`ac9!tnY|GUG>vvW=!j@!BaG0VW2(CT3w&gDn)H z>pe98LA3z~>>5y>7fL5Nl^if{R@=c;Z2vw%C&l=#nWH~}=Gm!!0K1|TIoQVlt(MY$ z7-w8{VVZCeBX@lEnwr2?`bNXLwr4h9>xK2#A&Mm@liqwP(aN4m2CV zCMGtA8_;WB0Ot7xF@;CN-Gsun5;m%N$HXhS&;WLwQSC!qW@UU)`R@s|uC2J?dvc!u zLXE%T>)ClYuE2bAUz>N`qKr-&ARl3gv{8z`HoJh9D5p%p*YPb}2Pr$fglB0G7bm(o z@~X1mR?sa4zn*Yu(eN>2ml$V-W|!sf?yX*}oEkn@e)@MnDiq&Yx%f{;<;=I-nIY-Y zUOZbw=(!cLPeLvD%si(eMmL%2=ecVb9V*kSK%z2%iw0IDR5#mQlC5T+uldi&(cXCn z6Ve~hT~q1F9bO!y{x*mSh#adXU6Q?3r>b)w3r{K?dC>*%b|zoT&fnPr4_x!4r5rXT z@$_=uV6bYXbhY8-o zSI>PaosY744EimId&J{PXNP7n;%08i0{^nZNPc_rfFwwbppU820yEF zXY!aeFwp?uKhw}EB~!V6=JjoTKcBtP2_>ACpTF_ zGYQb#n_6UrnIAYMllxz(>fm->H8eXBQ6YKx6FOqDS!shQg{i5jFO1_`tw)H+SNV+sJjuxC_*^Z~aKjR|9a3qB#Jw| z#aG}Pst-2+jA54NM%~$PZvHl$+#s>hH_8#Z`xK%_{iPiIw9UN=+dUlNb4dpsU8%Q6 zb$l+(SfENx(5ec=E= logging.INFO: - msg = f"Attempting %s(...), will retry for {s} given these issues: %s" - log_args = [details["target"].__name__] + s_args + [exception_s] - else: - msg = f"Attempting %s(%s), will retry for {s} given these issues: %s" - target_input_list = [] - if args := details.get("args"): - target_input_list.extend([str(d) for d in args]) - if kwargs := details.get("kwargs"): - target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()]) - target_input = ", ".join(target_input_list) if target_input_list else "" - log_args = ( - [ - details["target"].__name__, - target_input, - ] - + s_args - + [exception_s] - ) - logger.log(log_level, msg, *log_args) - - -# Default backoff handler -def _log_backoff(details, logger, log_level): - if log_level >= logging.INFO: - msg = "Backing off %s(...) for %.1fs (%s)" - log_args = [details["target"].__name__, details["tries"]] - else: - msg = "Backing off %.1fs seconds after %d tries calling function %s(%s) -> %s" - target_input_list = [] - if args := details.get("args"): - target_input_list.extend([str(d) for d in args]) - if kwargs := details.get("kwargs"): - target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()]) - target_input = ", ".join(target_input_list) if target_input_list else "" - log_args = [ - details["wait"], - details["tries"], - details["target"].__name__, - target_input, - ] - exc_typ, exc, _ = sys.exc_info() - if exc is not None: - exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1] - log_args.append(exc_fmt.rstrip("\n")) - else: - log_args.append(str(details["value"])) - logger.log(log_level, msg, *log_args) - - -# Default giveup handler -def _log_giveup(details, logger, log_level): - if log_level >= logging.INFO: - msg = "Giving up %s(...) after %.1fs (%s)" - log_args = [details["target"].__name__, details["tries"]] - else: - msg = "Giving up after %d tries (%.1fs) calling function %s(%s) -> %s" - target_input_list = [] - if args := details.get("args"): - target_input_list.extend([str(d) for d in args]) - if kwargs := details.get("kwargs"): - target_input_list.extend([f"{k}={str(v)}" for k, v in kwargs.items()]) - target_input = ", ".join(target_input_list) if target_input_list else "..." - log_args = [ - details["tries"], - details["wait"], - details["target"].__name__, - target_input, - ] - - exc_typ, exc, _ = sys.exc_info() - if exc is not None: - exc_fmt = traceback.format_exception_only(exc_typ, exc)[-1] - log_args.append(exc_fmt.rstrip("\n")) - else: - log_args.append(details["value"]) - - logger.log(log_level, msg, *log_args) diff --git a/unstructured/ingest/ingest_backoff/_wrapper.py b/unstructured/ingest/ingest_backoff/_wrapper.py deleted file mode 100644 index 66e9d193a..000000000 --- a/unstructured/ingest/ingest_backoff/_wrapper.py +++ /dev/null @@ -1,122 +0,0 @@ -# coding:utf-8 -import logging -from collections.abc import Iterable as IterableType -from typing import Any, Iterable, Optional, Type, Union - -from backoff import _sync -from backoff._common import _config_handlers, _prepare_logger -from backoff._jitter import full_jitter -from backoff._typing import ( - _Handler, - _Jitterer, - _MaybeCallable, - _MaybeLogger, - _MaybeSequence, - _Predicate, - _WaitGenerator, -) - -from unstructured.ingest.ingest_backoff._common import _log_backoff, _log_giveup, _log_start - - -class RetryHandler: - def __init__( - self, - wait_gen: _WaitGenerator, - exception: _MaybeSequence[Type[Exception]], - *, - max_tries: Optional[_MaybeCallable[int]] = None, - max_time: Optional[_MaybeCallable[float]] = None, - jitter: Union[_Jitterer, None] = full_jitter, - giveup: _Predicate[Exception] = lambda e: False, - on_start: Union[_Handler, Iterable[_Handler], None] = None, - on_success: Union[_Handler, Iterable[_Handler], None] = None, - on_backoff: Union[_Handler, Iterable[_Handler], None] = None, - on_giveup: Union[_Handler, Iterable[_Handler], None] = None, - raise_on_giveup: bool = True, - logger: _MaybeLogger = "backoff", - start_log_level: int = logging.INFO, - backoff_log_level: int = logging.INFO, - giveup_log_level: int = logging.ERROR, - **wait_gen_kwargs: Any, - ): - prepared_logger = _prepare_logger(logger) - on_success = _config_handlers(on_success) - on_start = _config_handlers( - on_start, - default_handler=_log_start, - logger=prepared_logger, - log_level=start_log_level, - ) - on_backoff = _config_handlers( - on_backoff, - default_handler=_log_backoff, - logger=prepared_logger, - log_level=backoff_log_level, - ) - on_giveup = _config_handlers( - on_giveup, - default_handler=_log_giveup, - logger=prepared_logger, - log_level=giveup_log_level, - ) - prepared_logger.debug( - "Initiating retry handler with " - "max_tries={}, " - "max_time={}, " - "exception={}, " - "start_log_level={}, " - "backoff_log_level={}, " - "giveup_log_level={}".format( - max_tries, - max_time, - ( - ", ".join([e.__name__ for e in exception]) - if isinstance(exception, IterableType) - else exception.__name__ - ), - logging.getLevelName(start_log_level), - logging.getLevelName(backoff_log_level), - logging.getLevelName(giveup_log_level), - ), - ) - self.on_start = on_start - self.on_success = on_success - self.on_backoff = on_backoff - self.on_giveup = on_giveup - self.jitter = jitter - self.giveup = giveup - self.raise_on_giveup = raise_on_giveup - self.wait_gen_kwargs = wait_gen_kwargs - self.wait_gen = wait_gen - self.exception = exception - self.max_tries = max_tries - self.max_time = max_time - - def __call__(self, target, *args, **kwargs): - _sync._call_handlers( - self.on_start, - target=target, - args=args, - kwargs=kwargs, - tries=None, - elapsed=None, - max_tries=self.max_tries, - max_time=self.max_time, - exception=self.exception, - ) - wrapped_func = _sync.retry_exception( - target, - self.wait_gen, - self.exception, - max_tries=self.max_tries, - max_time=self.max_time, - jitter=self.jitter, - giveup=self.giveup, - on_success=self.on_success, - on_backoff=self.on_backoff, - on_giveup=self.on_giveup, - raise_on_giveup=self.raise_on_giveup, - wait_gen_kwargs=self.wait_gen_kwargs, - ) - return wrapped_func(*args, **kwargs) diff --git a/unstructured/ingest/interfaces.py b/unstructured/ingest/interfaces.py deleted file mode 100644 index 95edd13b1..000000000 --- a/unstructured/ingest/interfaces.py +++ /dev/null @@ -1,845 +0,0 @@ -"""Defines Abstract Base Classes (ABC's) core to batch processing documents -through Unstructured.""" - -from __future__ import annotations - -import functools -import json -import os -import re -from abc import ABC, abstractmethod -from dataclasses import InitVar, dataclass, field -from datetime import datetime -from pathlib import Path -from typing import Any, Optional, Type, TypeVar - -from dataclasses_json import DataClassJsonMixin -from dataclasses_json.core import Json, _decode_dataclass - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.embed.interfaces import BaseEmbeddingEncoder, Element -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.enhanced_dataclass.core import _asdict -from unstructured.ingest.error import PartitionError, SourceConnectionError -from unstructured.ingest.logger import logger -from unstructured.partition.api import partition_via_api -from unstructured.staging.base import elements_to_dicts, flatten_dict - -A = TypeVar("A", bound="DataClassJsonMixin") - -# -- Needed to resolve TypeError raised by using InitVar and __future__.annotations -# -- See more here: https://stackoverflow.com/questions/70400639/ -InitVar.__call__ = lambda *args: None # type: ignore - -SUPPORTED_REMOTE_FSSPEC_PROTOCOLS = [ - "s3", - "s3a", - "abfs", - "az", - "gs", - "gcs", - "box", - "dropbox", - "sftp", -] - - -@dataclass -class BaseSessionHandle(ABC): - """Abstract Base Class for sharing resources that are local to an individual process. - e.g., a connection for making a request for fetching documents.""" - - -@dataclass -class BaseConfig(EnhancedDataClassJsonMixin, ABC): - pass - - -@dataclass -class AccessConfig(BaseConfig): - """Meant to designate holding any sensitive information associated with other configs - and also for access specific configs.""" - - -@dataclass -class RetryStrategyConfig(BaseConfig): - """ - Contains all info needed for decorator to pull from `self` for backoff - and retry triggered by exception. - - Args: - max_retries: The maximum number of attempts to make before giving - up. Once exhausted, the exception will be allowed to escape. - The default value of None means there is no limit to the - number of tries. If a callable is passed, it will be - evaluated at runtime and its return value used. - max_retry_time: The maximum total amount of time to try for before - giving up. Once expired, the exception will be allowed to - escape. If a callable is passed, it will be - evaluated at runtime and its return value used. - """ - - max_retries: Optional[int] = None - max_retry_time: Optional[float] = None - - -@dataclass -class PartitionConfig(BaseConfig): - # where to write structured data outputs - pdf_infer_table_structure: bool = False - strategy: str = "auto" - ocr_languages: Optional[list[str]] = None - encoding: Optional[str] = None - additional_partition_args: dict[str, Any] = field(default_factory=dict) - skip_infer_table_types: Optional[list[str]] = None - fields_include: list[str] = field( - default_factory=lambda: ["element_id", "text", "type", "metadata", "embeddings"], - ) - flatten_metadata: bool = False - metadata_exclude: list[str] = field(default_factory=list) - metadata_include: list[str] = field(default_factory=list) - partition_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general" - partition_by_api: bool = False - api_key: Optional[str] = str(enhanced_field(default=None, sensitive=True)) or None - hi_res_model_name: Optional[str] = None - - -@dataclass -class ProcessorConfig(BaseConfig): - reprocess: bool = False - verbose: bool = False - work_dir: str = str((Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve()) - output_dir: str = "structured-output" - num_processes: int = 2 - raise_on_error: bool = False - - -@dataclass -class FileStorageConfig(BaseConfig): - remote_url: str - uncompress: bool = False - recursive: bool = False - file_glob: Optional[list[str]] = None - - -@dataclass -class FsspecConfig(FileStorageConfig): - access_config: Optional[AccessConfig] = None - protocol: str = field(init=False) - path_without_protocol: str = field(init=False) - dir_path: str = field(init=False) - file_path: str = field(init=False) - - def get_access_config(self) -> dict[str, Any]: - if self.access_config: - return self.access_config.to_dict(apply_name_overload=False) - else: - return {} - - def __post_init__(self): - self.protocol, self.path_without_protocol = self.remote_url.split("://") - if self.protocol not in SUPPORTED_REMOTE_FSSPEC_PROTOCOLS: - raise ValueError( - f"Protocol {self.protocol} not supported yet, only " - f"{SUPPORTED_REMOTE_FSSPEC_PROTOCOLS} are supported.", - ) - - # dropbox root is an empty string - match = re.match(rf"{self.protocol}://([\s])/", self.remote_url) - if match and self.protocol == "dropbox": - self.dir_path = " " - self.file_path = "" - return - - # dropbox paths can start with slash - match = re.match(rf"{self.protocol}:///([^/\s]+?)/([^\s]*)", self.remote_url) - if match and self.protocol == "dropbox": - self.dir_path = match.group(1) - self.file_path = match.group(2) or "" - return - - # just a path with no trailing prefix - match = re.match(rf"{self.protocol}://([^/\s]+?)(/*)$", self.remote_url) - if match: - self.dir_path = match.group(1) - self.file_path = "" - return - - # valid path with a dir and/or file - match = re.match(rf"{self.protocol}://([^/\s]+?)/([^\s]*)", self.remote_url) - if not match: - raise ValueError( - f"Invalid path {self.remote_url}. " - f"Expected :///.", - ) - self.dir_path = match.group(1) - self.file_path = match.group(2) or "" - - -@dataclass -class ReadConfig(BaseConfig): - # where raw documents are stored for processing, and then removed if not preserve_downloads - download_dir: Optional[str] = "" - re_download: bool = False - preserve_downloads: bool = False - download_only: bool = False - max_docs: Optional[int] = None - - -@dataclass -class EmbeddingConfig(BaseConfig): - provider: str - api_key: Optional[str] = str(enhanced_field(default=None, sensitive=True)) or None - model_name: Optional[str] = None - aws_access_key_id: Optional[str] = None - aws_secret_access_key: Optional[str] = None - aws_region: Optional[str] = None - - def get_embedder(self) -> BaseEmbeddingEncoder: - kwargs: dict[str, Any] = {} - if self.api_key: - kwargs["api_key"] = self.api_key - if self.model_name: - kwargs["model_name"] = self.model_name - # TODO make this more dynamic to map to encoder configs - if self.provider == "langchain-openai": - from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder - - return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs)) - elif self.provider == "langchain-huggingface": - from unstructured.embed.huggingface import ( - HuggingFaceEmbeddingConfig, - HuggingFaceEmbeddingEncoder, - ) - - return HuggingFaceEmbeddingEncoder(config=HuggingFaceEmbeddingConfig(**kwargs)) - elif self.provider == "octoai": - from unstructured.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder - - return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs)) - elif self.provider == "langchain-aws-bedrock": - from unstructured.embed.bedrock import BedrockEmbeddingConfig, BedrockEmbeddingEncoder - - return BedrockEmbeddingEncoder( - config=BedrockEmbeddingConfig( - aws_access_key_id=self.aws_access_key_id, - aws_secret_access_key=self.aws_secret_access_key, - region_name=self.aws_region, - ) - ) - elif self.provider == "langchain-vertexai": - from unstructured.embed.vertexai import ( - VertexAIEmbeddingConfig, - VertexAIEmbeddingEncoder, - ) - - return VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(**kwargs)) - elif self.provider == "langchain-voyageai": - from unstructured.embed.voyageai import ( - VoyageAIEmbeddingConfig, - VoyageAIEmbeddingEncoder, - ) - - return VoyageAIEmbeddingEncoder(config=VoyageAIEmbeddingConfig(**kwargs)) - elif self.provider == "mixedbread-ai": - from unstructured.embed.mixedbreadai import ( - MixedbreadAIEmbeddingConfig, - MixedbreadAIEmbeddingEncoder, - ) - - return MixedbreadAIEmbeddingEncoder(config=MixedbreadAIEmbeddingConfig(**kwargs)) - else: - raise ValueError(f"{self.provider} not a recognized encoder") - - -@dataclass -class ChunkingConfig(BaseConfig): - chunk_elements: InitVar[bool] = False - chunking_strategy: Optional[str] = None - combine_text_under_n_chars: Optional[int] = None - include_orig_elements: Optional[bool] = None - max_characters: Optional[int] = None - multipage_sections: Optional[bool] = None - new_after_n_chars: Optional[int] = None - overlap: Optional[int] = None - overlap_all: Optional[bool] = None - - def __post_init__(self, chunk_elements: bool) -> None: - """Resolve chunking_strategy if chunk_elements is True. - - If chunk_elements is True and chunking_strategy is None, default to 'by_title'. Otherwise, - do nothing and keep the defined value of chunking_strategy." - """ - if chunk_elements and self.chunking_strategy is None: - self.chunking_strategy = "by_title" - - -@dataclass -class PermissionsConfig(BaseConfig): - application_id: Optional[str] = enhanced_field(overload_name="permissions_application_id") - tenant: Optional[str] = enhanced_field(overload_name="permissions_tenant") - client_cred: Optional[str] = enhanced_field( - default=None, sensitive=True, overload_name="permissions_client_cred" - ) - - -# module-level variable to store session handle -global_write_session_handle: Optional[BaseSessionHandle] = None - - -@dataclass -class WriteConfig(BaseConfig): - pass - - -@dataclass -class BaseConnectorConfig(BaseConfig, ABC): - """Abstract definition on which to define connector-specific attributes.""" - - -@dataclass -class SourceMetadata(EnhancedDataClassJsonMixin, ABC): - date_created: Optional[str] = None - date_modified: Optional[str] = None - version: Optional[str] = None - source_url: Optional[str] = None - exists: Optional[bool] = None - permissions_data: Optional[list[dict[str, Any]]] = None - - -class IngestDocJsonMixin(EnhancedDataClassJsonMixin): - """ - Inherently, DataClassJsonMixin does not add in any @property fields to the json/dict - created from the dataclass. This explicitly sets properties to look for on the IngestDoc - class when creating the json/dict for serialization purposes. - """ - - metadata_properties = [ - "date_created", - "date_modified", - "date_processed", - "exists", - "permissions_data", - "version", - "source_url", - ] - properties_to_serialize = [ - "base_filename", - "filename", - "_output_filename", - "record_locator", - "_source_metadata", - "unique_id", - ] - - def add_props(self, as_dict: dict[str, Any], props: list[str]): - for prop in props: - val = getattr(self, prop) - if isinstance(val, Path): - val = str(val) - if isinstance(val, DataClassJsonMixin): - val = val.to_dict(encode_json=False) - as_dict[prop] = val - - def to_dict(self, **kwargs) -> dict[str, Json]: - as_dict = _asdict(self, **kwargs) - if "_session_handle" in as_dict: - as_dict.pop("_session_handle", None) - self.add_props(as_dict=as_dict, props=self.properties_to_serialize) - if getattr(self, "_source_metadata") is not None: - self.add_props(as_dict=as_dict, props=self.metadata_properties) - return as_dict - - @classmethod - def from_dict( - cls: Type[A], kvs: Json, *, infer_missing=False, apply_name_overload: bool = True - ) -> A: - doc = super().from_dict( - kvs=kvs, infer_missing=infer_missing, apply_name_overload=apply_name_overload - ) - if meta := kvs.get("_source_metadata"): - setattr(doc, "_source_metadata", SourceMetadata.from_dict(meta)) - if date_processed := kvs.get("_date_processed"): - setattr(doc, "_date_processed", date_processed) - return doc - - -class BatchIngestDocJsonMixin(EnhancedDataClassJsonMixin): - """ - Inherently, DataClassJsonMixin does not add in any @property fields to the json/dict - created from the dataclass. This explicitly sets properties to look for on the IngestDoc - class when creating the json/dict for serialization purposes. - """ - - properties_to_serialize = ["unique_id"] - - def add_props(self, as_dict: dict[str, Any], props: list[str]): - for prop in props: - val = getattr(self, prop) - if isinstance(val, Path): - val = str(val) - if isinstance(val, DataClassJsonMixin): - val = val.to_dict(encode_json=False) - as_dict[prop] = val - - def to_dict(self, encode_json=False) -> dict[str, Json]: - as_dict = _asdict(self, encode_json=encode_json) - self.add_props(as_dict=as_dict, props=self.properties_to_serialize) - return as_dict - - @classmethod - def from_dict(cls: Type[A], kvs: Json, *, infer_missing=False) -> A: - doc = _decode_dataclass(cls, kvs, infer_missing) - return doc - - -@dataclass -class BaseIngestDoc(ABC): - processor_config: ProcessorConfig - read_config: ReadConfig - connector_config: BaseConnectorConfig - - @property - @abstractmethod - def unique_id(self) -> str: - pass - - -@dataclass -class BaseSingleIngestDoc(BaseIngestDoc, IngestDocJsonMixin, ABC): - """An "ingest document" is specific to a connector, and provides - methods to fetch a single raw document, store it locally for processing, any cleanup - needed after successful processing of the doc, and the ability to write the doc's - structured outputs once processed. - - Crucially, it is not responsible for the actual processing of the raw document. - """ - - _source_metadata: Optional[SourceMetadata] = field(init=False, default=None) - _date_processed: Optional[str] = field(init=False, default=None) - - @property - def source_metadata(self) -> SourceMetadata: - if self._source_metadata is None: - self.update_source_metadata() - # Provide guarantee that the field was set by update_source_metadata() - if self._source_metadata is None: - raise ValueError("failed to set source metadata") - return self._source_metadata - - @source_metadata.setter - def source_metadata(self, value: SourceMetadata): - self._source_metadata = value - - @property - def date_created(self) -> Optional[str]: - """The date the document was created on the source system.""" - return self.source_metadata.date_created - - @property - def date_modified(self) -> Optional[str]: - """The date the document was last modified on the source system.""" - return self.source_metadata.date_modified - - @property - def date_processed(self) -> Optional[str]: - """The date the document was last processed by Unstructured. - self._date_processed is assigned internally in self.partition_file()""" - return self._date_processed - - @property - def exists(self) -> Optional[bool]: - """Whether the document exists on the remote source.""" - return self.source_metadata.exists - - @property - @abstractmethod - def filename(self): - """The local filename of the document after fetching from remote source.""" - - @property - def base_filename(self) -> Optional[str]: - if self.read_config.download_dir and self.filename: - download_path = str(Path(self.read_config.download_dir).resolve()) - full_path = str(self.filename) - base_path = full_path.replace(download_path, "") - return base_path - return None - - @property - def base_output_filename(self) -> Optional[str]: - if self.processor_config.output_dir and self._output_filename: - output_path = str(Path(self.processor_config.output_dir).resolve()) - full_path = str(self._output_filename) - base_path = full_path.replace(output_path, "") - return base_path - return None - - @property - @abstractmethod - def _output_filename(self): - """Filename of the structured output for this doc.""" - - @property - def record_locator(self) -> Optional[dict[str, Any]]: # Values must be JSON-serializable - """A dictionary with any data necessary to uniquely identify the document on - the source system.""" - return None - - @property - def unique_id(self) -> str: - return self.filename - - @property - def source_url(self) -> Optional[str]: - """The url of the source document.""" - return self.source_metadata.source_url # type: ignore - - @property - def version(self) -> Optional[str]: - """The version of the source document, this could be the last modified date, an - explicit version number, or anything else that can be used to uniquely identify - the version of the document.""" - return self.source_metadata.version # type: ignore - - @property - def permissions_data(self) -> Optional[list[dict[str, Any]]]: - """Access control data, aka permissions or sharing, from the source system.""" - if self.source_metadata is None: - self.update_source_metadata() - return self.source_metadata.permissions_data # type: ignore - - @abstractmethod - def cleanup_file(self): - """Removes the local copy the file (or anything else) after successful processing.""" - - @staticmethod - def skip_if_file_exists(func): - """Decorator that checks if a file exists, is not empty, and should not re-download, - if so log a message indicating as much and skip the decorated function.""" - - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - if ( - not self.read_config.re_download - and self.filename.is_file() - and self.filename.stat().st_size - ): - logger.debug(f"File exists: {self.filename}, skipping {func.__name__}") - return None - return func(self, *args, **kwargs) - - return wrapper - - # TODO: set as @abstractmethod and pass or raise NotImplementedError - def update_source_metadata(self, **kwargs) -> None: - """Sets the SourceMetadata and the properties for the doc""" - self._source_metadata = SourceMetadata() - - def update_permissions_data(self): - """Sets the _permissions_data property for the doc. - This property is later used to fill the corresponding SourceMetadata.permissions_data field, - and after that carries on to the permissions_data property.""" - self._permissions_data: Optional[list[dict[str, Any]]] = None - - # NOTE(crag): Future BaseIngestDoc classes could define get_file_object() methods - # in addition to or instead of get_file() - @abstractmethod - @SourceConnectionError.wrap - def get_file(self): - """Fetches the "remote" doc and stores it locally on the filesystem.""" - - def has_output(self) -> bool: - """Determine if structured output for this doc already exists.""" - return self._output_filename.is_file() and self._output_filename.stat().st_size - - @PartitionError.wrap - def partition_file( - self, - partition_config: PartitionConfig, - **partition_kwargs, - ) -> list[Element]: - from unstructured.partition.auto import partition - - if not partition_config.partition_by_api: - logger.debug("Using local partition") - elements = partition( - filename=str(self.filename), - data_source_metadata=DataSourceMetadata( - url=self.source_url, - version=self.version, - record_locator=self.record_locator, - date_created=self.date_created, - date_modified=self.date_modified, - date_processed=self.date_processed, - permissions_data=self.permissions_data, - ), - **partition_kwargs, - ) - else: - endpoint = partition_config.partition_endpoint - - logger.debug(f"Using remote partition ({endpoint})") - - elements = partition_via_api( - filename=str(self.filename), - api_key=partition_config.api_key, - api_url=endpoint, - **partition_kwargs, - ) - # TODO: add m_data_source_metadata to unstructured-api pipeline_api and then - # pass the stringified json here - return elements - - def process_file( - self, - partition_config: PartitionConfig, - **partition_kwargs, - ) -> Optional[list[dict[str, Any]]]: - self._date_processed = datetime.utcnow().isoformat() - if self.read_config.download_only: - return None - logger.info(f"Processing {self.filename}") - - elements = self.partition_file(partition_config=partition_config, **partition_kwargs) - element_dicts = elements_to_dicts(elements) - - self.isd_elems_no_filename: list[dict[str, Any]] = [] - for elem in element_dicts: - if partition_config.metadata_exclude and partition_config.metadata_include: - raise ValueError( - "Arguments `--metadata-include` and `--metadata-exclude` are " - "mutually exclusive with each other.", - ) - elif partition_config.metadata_exclude: - ex_list = partition_config.metadata_exclude - for ex in ex_list: - if "." in ex: # handle nested fields - nested_fields = ex.split(".") - current_elem = elem - for f in nested_fields[:-1]: - if f in current_elem: - current_elem = current_elem[f] - field_to_exclude = nested_fields[-1] - if field_to_exclude in current_elem: - current_elem.pop(field_to_exclude, None) - else: # handle top-level fields - elem["metadata"].pop(ex, None) # type: ignore[attr-defined] - elif partition_config.metadata_include: - in_list = partition_config.metadata_include - for k in list(elem["metadata"].keys()): # type: ignore[attr-defined] - if k not in in_list: - elem["metadata"].pop(k, None) # type: ignore[attr-defined] - in_list = partition_config.fields_include - elem = {k: v for k, v in elem.items() if k in in_list} - - if partition_config.flatten_metadata and "metadata" in elem: - metadata = elem.pop("metadata") - elem.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"])) - - self.isd_elems_no_filename.append(elem) - - return self.isd_elems_no_filename - - -@dataclass -class BaseIngestDocBatch(BaseIngestDoc, BatchIngestDocJsonMixin, ABC): - ingest_docs: list[BaseSingleIngestDoc] = field(default_factory=list) - - @abstractmethod - @SourceConnectionError.wrap - def get_files(self): - """Fetches the "remote" docs and stores it locally on the filesystem.""" - - -@dataclass -class BaseConnector(EnhancedDataClassJsonMixin, ABC): - @abstractmethod - def check_connection(self): - pass - - -@dataclass -class BaseSourceConnector(BaseConnector, ABC): - """Abstract Base Class for a connector to a remote source, e.g. S3 or Google Drive.""" - - processor_config: ProcessorConfig - read_config: ReadConfig - connector_config: BaseConnectorConfig - - @abstractmethod - def cleanup(self, cur_dir=None): - """Any additional cleanup up need after processing is complete. E.g., removing - temporary download dirs that are empty. - - By convention, documents that failed to process are typically not cleaned up.""" - - @abstractmethod - def initialize(self): - """Initializes the connector. Should also validate the connector is properly - configured: e.g., list a single a document from the source.""" - - @abstractmethod - def get_ingest_docs(self): - """Returns all ingest docs (derived from BaseIngestDoc). - This does not imply downloading all the raw documents themselves, - rather each IngestDoc is capable of fetching its content (in another process) - with IngestDoc.get_file().""" - - -@dataclass -class BaseDestinationConnector(BaseConnector, ABC): - write_config: WriteConfig - connector_config: BaseConnectorConfig - - def __init__(self, write_config: WriteConfig, connector_config: BaseConnectorConfig): - self.write_config = write_config - self.connector_config = connector_config - - def conform_dict(self, data: dict[str, Any]) -> None: - """ - When the original dictionary needs to be modified in place - """ - return - - def normalize_dict(self, element_dict: dict[str, Any]) -> dict[str, Any]: - """ - When the original dictionary needs to be mapped to a new one - """ - return element_dict - - @abstractmethod - def initialize(self): - """Initializes the connector. Should also validate the connector is properly - configured.""" - - def write(self, docs: list[BaseSingleIngestDoc]) -> None: - elements_dict = self.get_elements_dict(docs=docs) - self.modify_and_write_dict(elements_dict=elements_dict) - - def get_elements_dict(self, docs: list[BaseSingleIngestDoc]) -> list[dict[str, Any]]: - dict_list: list[dict[str, Any]] = [] - for doc in docs: - local_path = doc._output_filename - with open(local_path) as json_file: - dict_content = json.load(json_file) - logger.info( - f"Extending {len(dict_content)} json elements from content in {local_path}", - ) - dict_list.extend(dict_content) - return dict_list - - @abstractmethod - def write_dict(self, *args, elements_dict: list[dict[str, Any]], **kwargs) -> None: - pass - - def modify_and_write_dict(self, *args, elements_dict: list[dict[str, Any]], **kwargs) -> None: - """ - Modify in this instance means this method wraps calls to conform_dict() and - normalize() before actually processing the content via write_dict() - """ - for d in elements_dict: - self.conform_dict(data=d) - elements_dict_normalized = [self.normalize_dict(element_dict=d) for d in elements_dict] - return self.write_dict(*args, elements_dict=elements_dict_normalized, **kwargs) - - def write_elements(self, elements: list[Element], *args, **kwargs) -> None: - elements_dict = [e.to_dict() for e in elements] - self.modify_and_write_dict(*args, elements_dict=elements_dict, **kwargs) - - -class SourceConnectorCleanupMixin: - read_config: ReadConfig - - def cleanup(self, cur_dir=None): - """Recursively clean up downloaded files and directories.""" - if self.read_config.preserve_downloads or self.read_config.download_only: - return - if cur_dir is None: - cur_dir = self.read_config.download_dir - if cur_dir is None or not Path(cur_dir).is_dir(): - return - sub_dirs = os.listdir(cur_dir) - os.chdir(cur_dir) - for sub_dir in sub_dirs: - # don't traverse symlinks, not that there every should be any - if os.path.isdir(sub_dir) and not os.path.islink(sub_dir): - self.cleanup(sub_dir) - os.chdir("..") - if len(os.listdir(cur_dir)) == 0: - os.rmdir(cur_dir) - - -class PermissionsCleanupMixin: - processor_config: ProcessorConfig - - def cleanup_permissions(self, cur_dir=None): - def has_no_folders(folder_path): - folders = [ - item - for item in os.listdir(folder_path) - if os.path.isdir(os.path.join(folder_path, item)) - ] - return len(folders) == 0 - - """Recursively clean up downloaded files and directories.""" - if cur_dir is None: - cur_dir = Path(self.processor_config.output_dir, "permissions_data") - if not Path(cur_dir).exists(): - return - if Path(cur_dir).is_file(): - cur_file = cur_dir - os.remove(cur_file) - return - sub_dirs = os.listdir(cur_dir) - os.chdir(cur_dir) - for sub_dir in sub_dirs: - # don't traverse symlinks, not that there every should be any - if not os.path.islink(sub_dir): - self.cleanup_permissions(sub_dir) - os.chdir("..") - if has_no_folders(cur_dir): - os.rmdir(cur_dir) - - -class IngestDocCleanupMixin: - read_config: ReadConfig - - @property - @abstractmethod - def filename(self): - """The local filename of the document after fetching from remote source.""" - - def cleanup_file(self): - """Removes the local copy of the file after successful processing.""" - if ( - not self.read_config.preserve_downloads - and self.filename.is_file() - and not self.read_config.download_only - ): - logger.debug(f"Cleaning up {self}") - os.unlink(self.filename) - - -class ConfigSessionHandleMixin: - @abstractmethod - def create_session_handle(self) -> BaseSessionHandle: - """Creates a session handle that will be assigned on each IngestDoc to share - session related resources across all document handling for a given subprocess.""" - - -@dataclass -class IngestDocSessionHandleMixin: - connector_config: ConfigSessionHandleMixin - _session_handle: Optional[BaseSessionHandle] = field(default=None, init=False) - - @property - def session_handle(self): - """If a session handle is not assigned, creates a new one and assigns it.""" - if self._session_handle is None: - self._session_handle = self.connector_config.create_session_handle() - return self._session_handle - - @session_handle.setter - def session_handle(self, session_handle: BaseSessionHandle): - self._session_handle = session_handle diff --git a/unstructured/ingest/logger.py b/unstructured/ingest/logger.py deleted file mode 100644 index ed4e7180e..000000000 --- a/unstructured/ingest/logger.py +++ /dev/null @@ -1,130 +0,0 @@ -import ast -import json -import logging -import typing as t - -logger = logging.getLogger("unstructured.ingest") - - -def default_is_data_sensitive(k: str, v: t.Any) -> bool: - sensitive_fields = [ - "account_name", - "client_id", - ] - sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"] - return ( - v - and any([s in k.lower() for s in sensitive_triggers]) # noqa: C419 - or k.lower() in sensitive_fields - ) - - -def hide_sensitive_fields( - data: dict, is_sensitive_fn: t.Callable[[str, t.Any], bool] = default_is_data_sensitive -) -> dict: - """ - Will recursively look through every k, v pair in this dict and any nested ones and run - is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if - any string value can be parsed as valid json and process that dict as well and replace - the original string with the json.dumps() version of the redacted dict. - """ - new_data = data.copy() - for k, v in new_data.items(): - if is_sensitive_fn(k, v): - new_data[k] = "*******" - if isinstance(v, dict): - new_data[k] = hide_sensitive_fields(v) - if isinstance(v, str): - # Need to take into account strings generated via json.dumps() or simply printing a dict - try: - json_data = json.loads(v) - if isinstance(json_data, dict): - updated_data = hide_sensitive_fields(json_data) - new_data[k] = json.dumps(updated_data) - except json.JSONDecodeError: - pass - - return new_data - - -def redact_jsons(s: str) -> str: - """ - Takes in a generic string and pulls out all valid json content. Leverages - hide_sensitive_fields() to redact any sensitive information and replaces the - original json with the new redacted format. There can be any number of valid - jsons in a generic string and this will work. Having extra '{' without a - closing '}' will cause this to break though. i.e '{ text, {"a": 3}'. - - """ - chars = list(s) - if "{" not in chars: - return s - i = 0 - jsons = [] - i = 0 - while i < len(chars): - char = chars[i] - if char == "{": - stack = [char] - current = [char] - while len(stack) != 0 and i < len(chars): - i += 1 - char = chars[i] - current.append(char) - if char == "{": - stack.append(char) - if char == "}": - stack.pop(-1) - jsons.append("".join(current)) - continue - i += 1 - for j in jsons: - try: - formatted_j = json.dumps(json.loads(j)) - except json.JSONDecodeError: - formatted_j = json.dumps(ast.literal_eval(j)) - hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j))) - s = s.replace(j, hidden_j) - return s - - -class SensitiveFormatter(logging.Formatter): - def format(self, record): - s = super().format(record=record) - return redact_jsons(s) - - -def remove_root_handlers(logger: logging.Logger) -> None: - # NOTE(robinson) - in some environments such as Google Colab, there is a root handler - # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs. - # Removing these when they exist prevents this behavior - if logger.root.hasHandlers(): - for handler in logger.root.handlers: - logger.root.removeHandler(handler) - - -def ingest_log_streaming_init(level: int) -> None: - handler = logging.StreamHandler() - handler.name = "ingest_log_handler" - formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s") - handler.setFormatter(formatter) - - # Only want to add the handler once - if "ingest_log_handler" not in [h.name for h in logger.handlers]: - logger.addHandler(handler) - - remove_root_handlers(logger) - logger.setLevel(level) - - -def make_default_logger(level: int) -> logging.Logger: - """Return a custom logger.""" - logger = logging.getLogger("unstructured.ingest") - handler = logging.StreamHandler() - handler.name = "ingest_log_handler" - formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s") - handler.setFormatter(formatter) - logger.addHandler(handler) - logger.setLevel(level) - remove_root_handlers(logger) - return logger diff --git a/unstructured/ingest/main.py b/unstructured/ingest/main.py deleted file mode 100755 index ead616f40..000000000 --- a/unstructured/ingest/main.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -from unstructured.ingest.cli.cli import get_cmd - - -def main(): - ingest_cmd = get_cmd() - ingest_cmd() - - -if __name__ == "__main__": - main() diff --git a/unstructured/ingest/pipeline/__init__.py b/unstructured/ingest/pipeline/__init__.py deleted file mode 100644 index 439647b60..000000000 --- a/unstructured/ingest/pipeline/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -from .doc_factory import DocFactory -from .interfaces import PipelineContext, ReformatNode -from .partition import Partitioner -from .permissions import PermissionsDataCleaner -from .pipeline import Pipeline -from .reformat.chunking import Chunker -from .reformat.embedding import Embedder -from .source import Reader -from .write import Writer - -__all__ = [ - "DocFactory", - "Partitioner", - "Reader", - "Embedder", - "PipelineContext", - "Pipeline", - "Writer", - "Chunker", - "ReformatNode", - "PermissionsDataCleaner", -] diff --git a/unstructured/ingest/pipeline/copy.py b/unstructured/ingest/pipeline/copy.py deleted file mode 100644 index 5ec195265..000000000 --- a/unstructured/ingest/pipeline/copy.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import shutil -from pathlib import Path - -from unstructured.ingest.connector.registry import create_ingest_doc_from_dict -from unstructured.ingest.logger import logger -from unstructured.ingest.pipeline.interfaces import CopyNode - - -class Copier(CopyNode): - def run(self, json_path: str): - filename = os.path.basename(json_path) - doc_hash = os.path.splitext(filename)[0] - ingest_doc_dict = self.pipeline_context.ingest_docs_map[doc_hash] - ingest_doc = create_ingest_doc_from_dict(ingest_doc_dict) - desired_output = ingest_doc._output_filename - Path(desired_output).parent.mkdir(parents=True, exist_ok=True) - logger.info(f"Copying {json_path} -> {desired_output}") - shutil.copy(json_path, desired_output) diff --git a/unstructured/ingest/pipeline/doc_factory.py b/unstructured/ingest/pipeline/doc_factory.py deleted file mode 100644 index 38feca4e4..000000000 --- a/unstructured/ingest/pipeline/doc_factory.py +++ /dev/null @@ -1,12 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.pipeline.interfaces import DocFactoryNode - - -@dataclass -class DocFactory(DocFactoryNode): - def run(self, *args, **kwargs) -> t.Iterable[dict]: - docs = self.source_doc_connector.get_ingest_docs() - json_docs = [doc.to_dict() for doc in docs] - return json_docs diff --git a/unstructured/ingest/pipeline/interfaces.py b/unstructured/ingest/pipeline/interfaces.py deleted file mode 100644 index 8db9e536c..000000000 --- a/unstructured/ingest/pipeline/interfaces.py +++ /dev/null @@ -1,265 +0,0 @@ -import hashlib -import json -import logging -import multiprocessing as mp -import typing as t -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from multiprocessing.managers import DictProxy -from pathlib import Path - -import backoff -from dataclasses_json import DataClassJsonMixin - -from unstructured.ingest.error import SourceConnectionNetworkError -from unstructured.ingest.ingest_backoff import RetryHandler -from unstructured.ingest.interfaces import ( - BaseDestinationConnector, - BaseSourceConnector, - PartitionConfig, - ProcessorConfig, - ReadConfig, - RetryStrategyConfig, -) -from unstructured.ingest.logger import ingest_log_streaming_init, logger - - -@dataclass -class PipelineContext(ProcessorConfig): - """ - Data that gets shared across each pipeline node - """ - - def __post_init__(self): - self._ingest_docs_map: t.Optional[DictProxy] = None - - @property - def ingest_docs_map(self) -> DictProxy: - if self._ingest_docs_map is None: - raise ValueError("ingest_docs_map never initialized") - return self._ingest_docs_map - - @ingest_docs_map.setter - def ingest_docs_map(self, value: DictProxy): - self._ingest_docs_map = value - - -@dataclass -class PipelineNode(DataClassJsonMixin, ABC): - """ - Class that encapsulates logic to run during a single pipeline step - """ - - pipeline_context: PipelineContext - - def __call__(self, iterable: t.Optional[t.Iterable[t.Any]] = None) -> t.Any: - iterable = iterable if iterable else [] - if iterable: - logger.info( - f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore - ) - - self.initialize() - if not self.supported_multiprocessing(): - if iterable: - self.result = self.run(iterable) - else: - self.result = self.run() - elif self.pipeline_context.num_processes == 1: - if iterable: - self.result = [self.run(it) for it in iterable] - else: - self.result = self.run() - else: - with mp.Pool( - processes=self.pipeline_context.num_processes, - initializer=ingest_log_streaming_init, - initargs=(logging.DEBUG if self.pipeline_context.verbose else logging.INFO,), - ) as pool: - self.result = pool.map(self.run, iterable) - # Remove None which may be caused by failed docs that didn't raise an error - if isinstance(self.result, t.Iterable): - self.result = [r for r in self.result if r is not None] - return self.result - - def supported_multiprocessing(self) -> bool: - return True - - @abstractmethod - def run(self, *args, **kwargs) -> t.Optional[t.Any]: - pass - - def initialize(self): - if path := self.get_path(): - logger.info(f"Creating {path}") - path.mkdir(parents=True, exist_ok=True) - ingest_log_streaming_init(logging.DEBUG if self.pipeline_context.verbose else logging.INFO) - - def get_path(self) -> t.Optional[Path]: - return None - - -@dataclass -class DocFactoryNode(PipelineNode): - """ - Encapsulated logic to generate a list of ingest docs - """ - - source_doc_connector: BaseSourceConnector - - def initialize(self): - logger.info( - f"Running doc factory to generate ingest docs. " - f"Source connector: {self.source_doc_connector.to_json()}", - ) - super().initialize() - self.source_doc_connector.initialize() - - @abstractmethod - def run(self, *args, **kwargs) -> t.Iterable[dict]: - pass - - def supported_multiprocessing(self) -> bool: - return False - - -@dataclass -class SourceNode(PipelineNode): - """A pipeline node representing logic to pull data from a source using base ingest documents. - - This class encapsulates the logic for pulling data from a specified source using base ingest - documents. The output of this logic is expected to be in JSON format representing the data - itself. - - Attributes: - read_config: A configuration object specifying how to read data from the source. - retry_strategy_config: Optional configuration specifying the strategy for network errors. - - Properties: - retry_strategy: A retry handler configured based on the retry strategy configuration. - - Methods: - initialize: Initializes the source node and logs the process. - run: Abstract method for downloading data associated with ingest documents. - """ - - read_config: ReadConfig - retry_strategy_config: t.Optional[RetryStrategyConfig] = None - - @property - def retry_strategy(self) -> t.Optional[RetryHandler]: - if retry_strategy_config := self.retry_strategy_config: - return RetryHandler( - backoff.expo, - SourceConnectionNetworkError, - max_time=retry_strategy_config.max_retry_time, - max_tries=retry_strategy_config.max_retries, - logger=logger, - start_log_level=logger.level, - backoff_log_level=logger.level, - ) - return None - - def initialize(self): - logger.info("Running source node to download data associated with ingest docs") - super().initialize() - - @abstractmethod - def run(self, ingest_doc_json: str) -> t.Optional[str]: - pass - - -@dataclass -class PartitionNode(PipelineNode): - """ - Encapsulates logic to run partition on the json files as the output of the source node - """ - - partition_config: PartitionConfig - partition_kwargs: dict = field(default_factory=dict) - - def initialize(self): - logger.info( - f"Running partition node to extract content from json files. " - f"Config: {self.partition_config.to_json()}, " - f"partition kwargs: {json.dumps(self.partition_kwargs)}]", - ) - super().initialize() - - def create_hash(self) -> str: - hash_dict = self.partition_config.to_dict() - hash_dict["partition_kwargs"] = self.partition_kwargs - return hashlib.sha256(json.dumps(hash_dict, sort_keys=True).encode()).hexdigest()[:32] - - @abstractmethod - def run(self, json_path: str) -> t.Optional[str]: - pass - - def get_path(self) -> Path: - return (Path(self.pipeline_context.work_dir) / "partitioned").resolve() - - -@dataclass -class ReformatNode(PipelineNode, ABC): - """ - Encapsulated any logic to reformat the output List[Element] - content from partition before writing it - """ - - @abstractmethod - def run(self, elements_json: str) -> t.Optional[str]: - pass - - -@dataclass -class WriteNode(PipelineNode): - """ - Encapsulated logic to write the final result to a downstream data connection - """ - - dest_doc_connector: BaseDestinationConnector - - @abstractmethod - def run(self, json_paths: t.List[str]): - pass - - def initialize(self): - logger.info( - f"Running write node to upload content. " - f"Destination connector: {self.dest_doc_connector.to_json(redact_sensitive=True)}]", - ) - super().initialize() - self.dest_doc_connector.initialize() - - def supported_multiprocessing(self) -> bool: - return False - - -@dataclass -class CopyNode(PipelineNode): - """ - Encapsulated logic to copy the final result of the pipeline to the designated output location. - """ - - def initialize(self): - logger.info("Running copy node to move content to desired output location") - super().initialize() - - @abstractmethod - def run(self, json_path: str): - pass - - -@dataclass -class PermissionsNode(PipelineNode): - """ - Encapsulated logic to do operations on permissions related data. - """ - - def initialize(self): - logger.info("Running permissions node to cleanup the permissions folder") - super().initialize() - - @abstractmethod - def run(self): - pass diff --git a/unstructured/ingest/pipeline/partition.py b/unstructured/ingest/pipeline/partition.py deleted file mode 100644 index 4aa2ccc86..000000000 --- a/unstructured/ingest/pipeline/partition.py +++ /dev/null @@ -1,60 +0,0 @@ -import hashlib -import json -import typing as t -from dataclasses import dataclass -from pathlib import Path -from typing import Optional - -from unstructured.ingest.connector.registry import create_ingest_doc_from_dict -from unstructured.ingest.error import PartitionError -from unstructured.ingest.logger import logger -from unstructured.ingest.pipeline.interfaces import PartitionNode -from unstructured.ingest.pipeline.utils import get_ingest_doc_hash - - -@dataclass -class Partitioner(PartitionNode): - @PartitionError.wrap - def run(self, ingest_doc_dict) -> Optional[str]: - try: - doc = create_ingest_doc_from_dict(ingest_doc_dict) - doc_filename_hash = get_ingest_doc_hash(ingest_doc_dict) - hashed_filename = hashlib.sha256( - f"{self.create_hash()}{doc_filename_hash}".encode(), - ).hexdigest()[:32] - self.pipeline_context.ingest_docs_map[hashed_filename] = ingest_doc_dict - doc_filename = f"{hashed_filename}.json" - json_path = (Path(self.get_path()) / doc_filename).resolve() - if ( - not self.pipeline_context.reprocess - and json_path.is_file() - and json_path.stat().st_size - ): - logger.info(f"File exists: {json_path}, skipping partition") - return str(json_path) - partition_kwargs: t.Dict[str, t.Any] = { - "strategy": self.partition_config.strategy, - "encoding": self.partition_config.encoding, - "pdf_infer_table_structure": self.partition_config.pdf_infer_table_structure, - "languages": self.partition_config.ocr_languages, - "hi_res_model_name": self.partition_config.hi_res_model_name, - } - if self.partition_config.skip_infer_table_types: - partition_kwargs["skip_infer_table_types"] = ( - self.partition_config.skip_infer_table_types - ) - if self.partition_config.additional_partition_args: - partition_kwargs.update(self.partition_config.additional_partition_args) - elements = doc.process_file( - partition_config=self.partition_config, - **partition_kwargs, - ) - with open(json_path, "w", encoding="utf8") as output_f: - logger.info(f"writing partitioned content to {json_path}") - json.dump(elements, output_f, ensure_ascii=False, indent=2, sort_keys=True) - return str(json_path) - except Exception as e: - if self.pipeline_context.raise_on_error: - raise - logger.error(f"failed to partition doc: {ingest_doc_dict}, {e}", exc_info=True) - return None diff --git a/unstructured/ingest/pipeline/permissions.py b/unstructured/ingest/pipeline/permissions.py deleted file mode 100644 index 5a93b3cca..000000000 --- a/unstructured/ingest/pipeline/permissions.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass - -from unstructured.ingest.interfaces import PermissionsCleanupMixin, ProcessorConfig -from unstructured.ingest.pipeline.interfaces import PermissionsNode - - -@dataclass -class PermissionsDataCleaner(PermissionsNode, PermissionsCleanupMixin): - processor_config: ProcessorConfig - - def run(self): - self.cleanup_permissions() diff --git a/unstructured/ingest/pipeline/pipeline.py b/unstructured/ingest/pipeline/pipeline.py deleted file mode 100644 index 6c6897885..000000000 --- a/unstructured/ingest/pipeline/pipeline.py +++ /dev/null @@ -1,117 +0,0 @@ -import logging -import multiprocessing as mp -from dataclasses import dataclass, field -from typing import Any, Optional - -from dataclasses_json import DataClassJsonMixin - -from unstructured.ingest.connector.registry import create_ingest_doc_from_dict -from unstructured.ingest.interfaces import BaseIngestDocBatch, BaseSingleIngestDoc -from unstructured.ingest.logger import ingest_log_streaming_init, logger -from unstructured.ingest.pipeline.copy import Copier -from unstructured.ingest.pipeline.interfaces import ( - DocFactoryNode, - PartitionNode, - PipelineContext, - ReformatNode, - SourceNode, - WriteNode, -) -from unstructured.ingest.pipeline.permissions import PermissionsDataCleaner -from unstructured.ingest.pipeline.utils import get_ingest_doc_hash - - -@dataclass -class Pipeline(DataClassJsonMixin): - pipeline_context: PipelineContext - doc_factory_node: DocFactoryNode - source_node: SourceNode - partition_node: Optional[PartitionNode] = None - write_node: Optional[WriteNode] = None - reformat_nodes: "list[ReformatNode]" = field(default_factory=list) - permissions_node: Optional[PermissionsDataCleaner] = None - - def initialize(self): - ingest_log_streaming_init(logging.DEBUG if self.pipeline_context.verbose else logging.INFO) - - def get_nodes_str(self): - nodes = [self.doc_factory_node, self.source_node, self.partition_node] - nodes.extend(self.reformat_nodes) - if self.write_node: - nodes.append(self.write_node) - nodes.append(Copier(pipeline_context=self.pipeline_context)) - return " -> ".join([node.__class__.__name__ for node in nodes]) - - def expand_batch_docs(self, dict_docs: "list[dict[str, Any]]") -> "list[dict[str, Any]]": - expanded_docs: list[dict[str, Any]] = [] - for d in dict_docs: - doc = create_ingest_doc_from_dict(d) - if isinstance(doc, BaseSingleIngestDoc): - expanded_docs.append(doc.to_dict()) - elif isinstance(doc, BaseIngestDocBatch): - expanded_docs.extend([single_doc.to_dict() for single_doc in doc.ingest_docs]) - else: - raise ValueError( - f"type of doc ({type(doc)}) is not a recognized type: " - f"BaseSingleIngestDoc or BaseSingleIngestDoc" - ) - return expanded_docs - - def run(self): - logger.info( - f"running pipeline: {self.get_nodes_str()} " - f"with config: {self.pipeline_context.to_json()}", - ) - self.initialize() - manager = mp.Manager() - self.pipeline_context.ingest_docs_map = manager.dict() - # -- Get the documents to be processed -- - dict_docs = self.doc_factory_node() - dict_docs = [manager.dict(d) for d in dict_docs] - if not dict_docs: - logger.info("no docs found to process") - return - logger.info( - f"processing {len(dict_docs)} docs via " - f"{self.pipeline_context.num_processes} processes", - ) - for doc in dict_docs: - self.pipeline_context.ingest_docs_map[get_ingest_doc_hash(doc)] = doc - fetched_filenames = self.source_node(iterable=dict_docs) - if self.source_node.read_config.download_only: - logger.info("stopping pipeline after downloading files") - return - if not fetched_filenames: - logger.info("No files to run partition over") - return - # -- To support batches ingest docs, expand those into the populated single ingest - # -- docs after downloading content - dict_docs = self.expand_batch_docs(dict_docs=dict_docs) - if self.partition_node is None: - raise ValueError("partition node not set") - partitioned_jsons = self.partition_node(iterable=dict_docs) - if not partitioned_jsons: - logger.info("No files to process after partitioning") - return - for reformat_node in self.reformat_nodes: - reformatted_jsons = reformat_node(iterable=partitioned_jsons) - if not reformatted_jsons: - logger.info(f"No files to process after {reformat_node.__class__.__name__}") - return - partitioned_jsons = reformatted_jsons - - # -- Copy the final destination to the desired location -- - copier = Copier( - pipeline_context=self.pipeline_context, - ) - copier(iterable=partitioned_jsons) - - if self.write_node: - logger.info( - f"uploading elements from {len(partitioned_jsons)} " - "document(s) to the destination" - ) - self.write_node(iterable=partitioned_jsons) - - if self.permissions_node: - self.permissions_node.cleanup_permissions() diff --git a/unstructured/ingest/pipeline/reformat/__init__.py b/unstructured/ingest/pipeline/reformat/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/pipeline/reformat/chunking.py b/unstructured/ingest/pipeline/reformat/chunking.py deleted file mode 100644 index b061cfa1c..000000000 --- a/unstructured/ingest/pipeline/reformat/chunking.py +++ /dev/null @@ -1,129 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -import os.path -from dataclasses import dataclass -from pathlib import Path -from typing import Optional - -from unstructured.chunking import dispatch -from unstructured.documents.elements import Element, assign_and_map_hash_ids -from unstructured.ingest.interfaces import ChunkingConfig, PartitionConfig -from unstructured.ingest.logger import logger -from unstructured.ingest.pipeline.interfaces import ReformatNode -from unstructured.partition.api import partition_via_api -from unstructured.staging.base import elements_from_json, elements_to_dicts - - -@dataclass -class Chunker(ReformatNode): - """Implementation for the chunking node in the ingest pipeline. - - Parameters - ---------- - pipeline_context: PipelineContext (inherited from parent class) - chunking_config: ChunkingConfig - partition_config: PartitionConfig - """ - - chunking_config: ChunkingConfig - partition_config: PartitionConfig - - def initialize(self): - logger.info( - f"Running chunking node. Chunking config: {self.chunking_config.to_json()}]", - ) - super().initialize() - - def create_hash(self) -> str: - hash_dict = self.chunking_config.to_dict() - return hashlib.sha256(json.dumps(hash_dict, sort_keys=True).encode()).hexdigest()[:32] - - def run(self, elements_json: str) -> Optional[str]: - try: - elements_json_filename = os.path.basename(elements_json) - filename_ext = os.path.basename(elements_json_filename) - filename = os.path.splitext(filename_ext)[0] - hashed_filename = hashlib.sha256( - f"{self.create_hash()}{filename}".encode(), - ).hexdigest()[:32] - json_filename = f"{hashed_filename}.json" - json_path = (Path(self.get_path()) / json_filename).resolve() - self.pipeline_context.ingest_docs_map[hashed_filename] = ( - self.pipeline_context.ingest_docs_map[filename] - ) - if ( - not self.pipeline_context.reprocess - and json_path.is_file() - and json_path.stat().st_size - ): - logger.debug(f"File exists: {json_path}, skipping chunking") - return str(json_path) - - chunked_elements = self.chunk(elements_json) - - # -- return if chunking_strategy is None -- - if chunked_elements is None: - logger.info(f"chunking_strategy is None, skipping chunking for {filename_ext}") - return - - assign_and_map_hash_ids(chunked_elements) - - element_dicts = elements_to_dicts(chunked_elements) - with open(json_path, "w", encoding="utf8") as output_f: - logger.info(f"writing chunking content to {json_path}") - json.dump(element_dicts, output_f, ensure_ascii=False, indent=2) - return str(json_path) - - except Exception as e: - if self.pipeline_context.raise_on_error: - raise - logger.error(f"failed to run chunking on file {elements_json}, {e}", exc_info=True) - return None - - def get_path(self) -> Path: - return (Path(self.pipeline_context.work_dir) / "chunked").resolve() - - def chunk(self, elements_json_file: str) -> Optional[list[Element]]: - """Called by Chunker.run() to properly execute the defined chunking_strategy.""" - # -- No chunking_strategy means no chunking -- - if self.chunking_config.chunking_strategy is None: - return - # -- Chunk locally for open-source chunking strategies, even when partitioning remotely -- - if self.chunking_config.chunking_strategy in ("basic", "by_title"): - return dispatch.chunk( - elements=elements_from_json(filename=elements_json_file), - chunking_strategy=self.chunking_config.chunking_strategy, - combine_text_under_n_chars=self.chunking_config.combine_text_under_n_chars, - include_orig_elements=self.chunking_config.include_orig_elements, - max_characters=self.chunking_config.max_characters, - multipage_sections=self.chunking_config.multipage_sections, - new_after_n_chars=self.chunking_config.new_after_n_chars, - overlap=self.chunking_config.overlap, - overlap_all=self.chunking_config.overlap_all, - ) - # -- Chunk remotely -- - if self.partition_config.partition_by_api: - return partition_via_api( - filename=elements_json_file, - # -- (jennings) If api_key or api_url are None, partition_via_api will raise an - # -- error, which will be caught and logged by Chunker.run() - api_key=self.partition_config.api_key, # type: ignore - api_url=self.partition_config.partition_endpoint, # type: ignore - chunking_strategy=self.chunking_config.chunking_strategy, - combine_under_n_chars=self.chunking_config.combine_text_under_n_chars, - include_orig_elements=self.chunking_config.include_orig_elements, - max_characters=self.chunking_config.max_characters, - multipage_sections=self.chunking_config.multipage_sections, - new_after_n_chars=self.chunking_config.new_after_n_chars, - overlap=self.chunking_config.overlap, - overlap_all=self.chunking_config.overlap_all, - ) - # -- Warn that the defined chunking_strategy is not locally available -- - logger.warning( - f"There is no locally available chunking_strategy:" - f" {self.chunking_config.chunking_strategy}." - f" If trying to partition remotely, check that `partition_by_api`, `api_url`," - f" and `api_key` are correctly defined." - ) diff --git a/unstructured/ingest/pipeline/reformat/embedding.py b/unstructured/ingest/pipeline/reformat/embedding.py deleted file mode 100644 index 58d47b429..000000000 --- a/unstructured/ingest/pipeline/reformat/embedding.py +++ /dev/null @@ -1,65 +0,0 @@ -import hashlib -import json -import os.path -from dataclasses import dataclass -from pathlib import Path -from typing import Optional - -from unstructured.ingest.interfaces import ( - EmbeddingConfig, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.pipeline.interfaces import ReformatNode -from unstructured.staging.base import elements_from_json, elements_to_dicts - - -@dataclass -class Embedder(ReformatNode): - embedder_config: EmbeddingConfig - - def initialize(self): - logger.info( - f"Running embedding node. Embedding config: {self.embedder_config.to_json()}]", - ) - super().initialize() - - def create_hash(self) -> str: - hash_dict = self.embedder_config.to_dict() - return hashlib.sha256(json.dumps(hash_dict, sort_keys=True).encode()).hexdigest()[:32] - - def run(self, elements_json: str) -> Optional[str]: - try: - elements_json_filename = os.path.basename(elements_json) - filename_ext = os.path.basename(elements_json_filename) - filename = os.path.splitext(filename_ext)[0] - hashed_filename = hashlib.sha256( - f"{self.create_hash()}{filename}".encode(), - ).hexdigest()[:32] - json_filename = f"{hashed_filename}.json" - json_path = (Path(self.get_path()) / json_filename).resolve() - self.pipeline_context.ingest_docs_map[hashed_filename] = ( - self.pipeline_context.ingest_docs_map[filename] - ) - if ( - not self.pipeline_context.reprocess - and json_path.is_file() - and json_path.stat().st_size - ): - logger.debug(f"File exists: {json_path}, skipping embedding") - return str(json_path) - elements = elements_from_json(filename=elements_json) - embedder = self.embedder_config.get_embedder() - embedded_elements = embedder.embed_documents(elements=elements) - element_dicts = elements_to_dicts(embedded_elements) - with open(json_path, "w", encoding="utf8") as output_f: - logger.info(f"writing embeddings content to {json_path}") - json.dump(element_dicts, output_f, ensure_ascii=False, indent=2) - return str(json_path) - except Exception as e: - if self.pipeline_context.raise_on_error: - raise - logger.error(f"failed to embed content from file {elements_json}, {e}", exc_info=True) - return None - - def get_path(self) -> Path: - return (Path(self.pipeline_context.work_dir) / "embedded").resolve() diff --git a/unstructured/ingest/pipeline/source.py b/unstructured/ingest/pipeline/source.py deleted file mode 100644 index ee1087a07..000000000 --- a/unstructured/ingest/pipeline/source.py +++ /dev/null @@ -1,77 +0,0 @@ -import os -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.connector.registry import create_ingest_doc_from_dict -from unstructured.ingest.interfaces import ( - BaseIngestDocBatch, - BaseSessionHandle, - BaseSingleIngestDoc, - IngestDocSessionHandleMixin, -) -from unstructured.ingest.logger import logger -from unstructured.ingest.pipeline.interfaces import SourceNode - -# module-level variable to store session handle -session_handle: t.Optional[BaseSessionHandle] = None - - -@dataclass -class Reader(SourceNode): - def get_single(self, doc: BaseSingleIngestDoc, ingest_doc_dict: dict) -> str: - if ( - not self.read_config.re_download - and doc.filename.is_file() - and doc.filename.stat().st_size - ): - logger.info(f"File exists: {doc.filename}, skipping download") - # Still need to fetch metadata if file exists locally - doc.update_source_metadata() - else: - serialized_doc = doc.to_json(redact_sensitive=True) - logger.debug(f"Fetching {serialized_doc} - PID: {os.getpid()}") - if self.retry_strategy: - self.retry_strategy(doc.get_file) - else: - doc.get_file() - for k, v in doc.to_dict().items(): - ingest_doc_dict[k] = v - return doc.filename - - def get_batch(self, doc_batch: BaseIngestDocBatch, ingest_doc_dict: dict) -> t.List[str]: - if self.retry_strategy: - self.retry_strategy(doc_batch.get_files) - else: - doc_batch.get_files() - for k, v in doc_batch.to_dict().items(): - ingest_doc_dict[k] = v - return [doc.filename for doc in doc_batch.ingest_docs] - - def run(self, ingest_doc_dict: dict) -> t.Optional[t.Union[str, t.List[str]]]: - try: - global session_handle - doc = create_ingest_doc_from_dict(ingest_doc_dict) - if isinstance(doc, IngestDocSessionHandleMixin): - if session_handle is None: - # create via doc.session_handle, which is a property that creates a - # session handle if one is not already defined - session_handle = doc.session_handle - else: - doc._session_handle = session_handle - if isinstance(doc, BaseSingleIngestDoc): - return self.get_single(doc=doc, ingest_doc_dict=ingest_doc_dict) - elif isinstance(doc, BaseIngestDocBatch): - return self.get_batch(doc_batch=doc, ingest_doc_dict=ingest_doc_dict) - else: - raise ValueError( - f"type of doc ({type(doc)}) is not a recognized type: " - f"BaseSingleIngestDoc or BaseSingleIngestDoc" - ) - except Exception as e: - if self.pipeline_context.raise_on_error: - raise - logger.error( - f"failed to get data associated with source doc: {ingest_doc_dict}, {e}", - exc_info=True, - ) - return None diff --git a/unstructured/ingest/pipeline/utils.py b/unstructured/ingest/pipeline/utils.py deleted file mode 100644 index bcd6aa2ab..000000000 --- a/unstructured/ingest/pipeline/utils.py +++ /dev/null @@ -1,6 +0,0 @@ -import hashlib - - -def get_ingest_doc_hash(json_as_dict: dict) -> str: - hashed = hashlib.sha256(json_as_dict["unique_id"].encode()).hexdigest()[:32] - return hashed diff --git a/unstructured/ingest/pipeline/write.py b/unstructured/ingest/pipeline/write.py deleted file mode 100644 index 7a0540983..000000000 --- a/unstructured/ingest/pipeline/write.py +++ /dev/null @@ -1,18 +0,0 @@ -import os.path -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.connector.registry import create_ingest_doc_from_dict -from unstructured.ingest.pipeline.interfaces import WriteNode - - -@dataclass -class Writer(WriteNode): - def run(self, json_paths: t.List[str]): - ingest_docs = [] - for json_path in json_paths: - filename = os.path.basename(json_path) - doc_hash = os.path.splitext(filename)[0] - ingest_doc_dict = self.pipeline_context.ingest_docs_map[doc_hash] - ingest_docs.append(create_ingest_doc_from_dict(ingest_doc_dict)) - self.dest_doc_connector.write(docs=ingest_docs) diff --git a/unstructured/ingest/processor.py b/unstructured/ingest/processor.py deleted file mode 100644 index cf4c775cd..000000000 --- a/unstructured/ingest/processor.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import annotations - -import multiprocessing as mp -from contextlib import suppress -from typing import Optional - -from unstructured.ingest.interfaces import ( - BaseDestinationConnector, - BaseSourceConnector, - ChunkingConfig, - EmbeddingConfig, - PartitionConfig, - PermissionsConfig, - ProcessorConfig, - RetryStrategyConfig, -) -from unstructured.ingest.pipeline import ( - Chunker, - DocFactory, - Embedder, - Partitioner, - PermissionsDataCleaner, - Pipeline, - PipelineContext, - Reader, - ReformatNode, - Writer, -) - -with suppress(RuntimeError): - mp.set_start_method("spawn") - - -def process_documents( - processor_config: ProcessorConfig, - source_doc_connector: BaseSourceConnector, - partition_config: PartitionConfig, - dest_doc_connector: Optional[BaseDestinationConnector] = None, - chunking_config: Optional[ChunkingConfig] = None, - embedder_config: Optional[EmbeddingConfig] = None, - permissions_config: Optional[PermissionsConfig] = None, - retry_strategy_config: Optional[RetryStrategyConfig] = None, -) -> None: - pipeline_config = PipelineContext.from_dict(processor_config.to_dict()) - doc_factory = DocFactory( - pipeline_context=pipeline_config, - source_doc_connector=source_doc_connector, - ) - reader = Reader( - pipeline_context=pipeline_config, - retry_strategy_config=retry_strategy_config, - read_config=source_doc_connector.read_config, - ) - partitioner = Partitioner(pipeline_context=pipeline_config, partition_config=partition_config) - reformat_nodes: list[ReformatNode] = [] - if chunking_config: - reformat_nodes.append( - Chunker( - pipeline_context=pipeline_config, - chunking_config=chunking_config, - partition_config=partition_config, - ), - ) - if embedder_config: - reformat_nodes.append( - Embedder( - pipeline_context=pipeline_config, - embedder_config=embedder_config, - ), - ) - writer = ( - Writer( - pipeline_context=pipeline_config, - dest_doc_connector=dest_doc_connector, - ) - if dest_doc_connector - else None - ) - permissions_data_cleaner = ( - PermissionsDataCleaner(pipeline_context=pipeline_config, processor_config=processor_config) - if permissions_config - else None - ) - pipeline = Pipeline( - pipeline_context=pipeline_config, - doc_factory_node=doc_factory, - source_node=reader, - partition_node=partitioner, - reformat_nodes=reformat_nodes, - write_node=writer, - permissions_node=permissions_data_cleaner, - ) - pipeline.run() diff --git a/unstructured/ingest/runner/__init__.py b/unstructured/ingest/runner/__init__.py deleted file mode 100644 index 872ebb10d..000000000 --- a/unstructured/ingest/runner/__init__.py +++ /dev/null @@ -1,104 +0,0 @@ -import typing as t -from typing import Type - -from .airtable import AirtableRunner -from .astradb import AstraDBRunner -from .base_runner import Runner -from .biomed import BiomedRunner -from .confluence import ConfluenceRunner -from .delta_table import DeltaTableRunner -from .discord import DiscordRunner -from .elasticsearch import ElasticSearchRunner -from .fsspec.azure import AzureRunner -from .fsspec.box import BoxRunner -from .fsspec.dropbox import DropboxRunner -from .fsspec.fsspec import FsspecRunner -from .fsspec.gcs import GCSRunner -from .fsspec.s3 import S3Runner -from .fsspec.sftp import SftpRunner -from .github import GithubRunner -from .gitlab import GitlabRunner -from .google_drive import GoogleDriveRunner -from .hubspot import HubSpotRunner -from .jira import JiraRunner -from .kafka import KafkaRunner -from .local import LocalRunner -from .mongodb import MongoDBRunner -from .notion import NotionRunner -from .onedrive import OneDriveRunner -from .opensearch import OpenSearchRunner -from .outlook import OutlookRunner -from .reddit import RedditRunner -from .salesforce import SalesforceRunner -from .sharepoint import SharePointRunner -from .slack import SlackRunner -from .wikipedia import WikipediaRunner - -runner_map: t.Dict[str, Type[Runner]] = { - "airtable": AirtableRunner, - "astradb": AstraDBRunner, - "azure": AzureRunner, - "biomed": BiomedRunner, - "box": BoxRunner, - "confluence": ConfluenceRunner, - "delta_table": DeltaTableRunner, - "discord": DiscordRunner, - "dropbox": DropboxRunner, - "elasticsearch": ElasticSearchRunner, - "fsspec": FsspecRunner, - "gcs": GCSRunner, - "github": GithubRunner, - "gitlab": GitlabRunner, - "gdrive": GoogleDriveRunner, - "google_drive": GoogleDriveRunner, - "hubspot": HubSpotRunner, - "jira": JiraRunner, - "kafka": KafkaRunner, - "local": LocalRunner, - "mongodb": MongoDBRunner, - "notion": NotionRunner, - "onedrive": OneDriveRunner, - "opensearch": OpenSearchRunner, - "outlook": OutlookRunner, - "reddit": RedditRunner, - "s3": S3Runner, - "salesforce": SalesforceRunner, - "sftp": SftpRunner, - "sharepoint": SharePointRunner, - "slack": SlackRunner, - "wikipedia": WikipediaRunner, -} - -__all__ = [ - "AirtableRunner", - "AstraRunner", - "AzureRunner", - "BiomedRunner", - "BoxRunner", - "ConfluenceRunner", - "DeltaTableRunner", - "DiscordRunner", - "DropboxRunner", - "ElasticSearchRunner", - "FsspecRunner", - "GCSRunner", - "GoogleDriveRunner", - "GithubRunner", - "GitlabRunner", - "JiraRunner", - "KafkaRunner", - "LocalRunner", - "MongoDBRunner", - "NotionRunner", - "OneDriveRunner", - "OpenSearchRunner", - "OutlookRunner", - "RedditRunner", - "S3Runner", - "SalesforceRunner", - "SharePointRunner", - "SlackRunner", - "WikipediaRunner", - "runner_map", - "Runner", -] diff --git a/unstructured/ingest/runner/airtable.py b/unstructured/ingest/runner/airtable.py deleted file mode 100644 index ec148221c..000000000 --- a/unstructured/ingest/runner/airtable.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.airtable import SimpleAirtableConfig - - -@dataclass -class AirtableRunner(Runner): - connector_config: "SimpleAirtableConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.access_config.personal_access_token.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="airtable", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.airtable import ( - AirtableSourceConnector, - ) - - return AirtableSourceConnector diff --git a/unstructured/ingest/runner/astradb.py b/unstructured/ingest/runner/astradb.py deleted file mode 100644 index a07c66b93..000000000 --- a/unstructured/ingest/runner/astradb.py +++ /dev/null @@ -1,34 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.astradb import SimpleAstraDBConfig - - -@dataclass -class AstraDBRunner(Runner): - connector_config: "SimpleAstraDBConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - str(self.connector_config.access_config.api_endpoint).encode("utf-8"), - ) - self.read_config.download_dir = update_download_dir_hash( - connector_name="astradb", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.astradb import ( - AstraDBSourceConnector, - ) - - return AstraDBSourceConnector diff --git a/unstructured/ingest/runner/base_runner.py b/unstructured/ingest/runner/base_runner.py deleted file mode 100644 index dbc9c58d1..000000000 --- a/unstructured/ingest/runner/base_runner.py +++ /dev/null @@ -1,89 +0,0 @@ -import logging -import typing as t -from abc import ABC, abstractmethod -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseDestinationConnector, - BaseSourceConnector, - ChunkingConfig, - EmbeddingConfig, - PartitionConfig, - PermissionsConfig, - ProcessorConfig, - ReadConfig, - RetryStrategyConfig, -) -from unstructured.ingest.logger import ingest_log_streaming_init -from unstructured.ingest.processor import process_documents -from unstructured.ingest.runner.writers.base_writer import Writer - - -@dataclass -class Runner(EnhancedDataClassJsonMixin, ABC): - connector_config: BaseConnectorConfig - processor_config: ProcessorConfig - read_config: ReadConfig - partition_config: PartitionConfig - writer: t.Optional[Writer] = None - writer_kwargs: t.Optional[dict] = None - embedding_config: t.Optional[EmbeddingConfig] = None - chunking_config: t.Optional[ChunkingConfig] = None - permissions_config: t.Optional[PermissionsConfig] = None - retry_strategy_config: t.Optional[RetryStrategyConfig] = None - - def run(self, *args, **kwargs): - ingest_log_streaming_init(logging.DEBUG if self.processor_config.verbose else logging.INFO) - self.update_read_config() - source_connector = self.get_source_connector() - self.process_documents( - source_doc_connector=source_connector, - ) - - @abstractmethod - def update_read_config(self): - pass - - @abstractmethod - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - pass - - def get_source_connector(self) -> BaseSourceConnector: - source_connector_cls = self.get_source_connector_cls() - return source_connector_cls( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - ) - - def get_dest_doc_connector(self) -> t.Optional[BaseDestinationConnector]: - writer_kwargs = self.writer_kwargs if self.writer_kwargs else {} - if self.writer: - return self.writer.get_connector(**writer_kwargs) - return None - - def get_permissions_config(self) -> t.Optional[PermissionsConfig]: - if self.permissions_config is None: - return None - - permissions_config_filled = bool( - self.permissions_config.application_id - and self.permissions_config.client_cred - and self.permissions_config.tenant, - ) - - return self.permissions_config if permissions_config_filled else None - - def process_documents(self, source_doc_connector: BaseSourceConnector): - process_documents( - processor_config=self.processor_config, - source_doc_connector=source_doc_connector, - partition_config=self.partition_config, - dest_doc_connector=self.get_dest_doc_connector(), - embedder_config=self.embedding_config, - chunking_config=self.chunking_config, - permissions_config=self.get_permissions_config(), - retry_strategy_config=self.retry_strategy_config, - ) diff --git a/unstructured/ingest/runner/biomed.py b/unstructured/ingest/runner/biomed.py deleted file mode 100644 index 045d4486c..000000000 --- a/unstructured/ingest/runner/biomed.py +++ /dev/null @@ -1,45 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.biomed import SimpleBiomedConfig - - -@dataclass -class BiomedRunner(Runner): - connector_config: "SimpleBiomedConfig" - - def update_read_config(self): - base_path = ( - self.connector_config.path - if self.connector_config.path - else "{}-{}-{}".format( - self.connector_config.api_id if self.connector_config.api_id else "", - self.connector_config.api_from if self.connector_config.api_from else "", - self.connector_config.api_until if self.connector_config.api_until else "", - ) - ) - - hashed_dir_name = hashlib.sha256( - base_path.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="biomed", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.biomed import ( - BiomedSourceConnector, - ) - - return BiomedSourceConnector diff --git a/unstructured/ingest/runner/confluence.py b/unstructured/ingest/runner/confluence.py deleted file mode 100644 index 3f6057512..000000000 --- a/unstructured/ingest/runner/confluence.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.confluence import SimpleConfluenceConfig - - -@dataclass -class ConfluenceRunner(Runner): - connector_config: "SimpleConfluenceConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.url.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="confluence", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.confluence import ( - ConfluenceSourceConnector, - ) - - return ConfluenceSourceConnector diff --git a/unstructured/ingest/runner/delta_table.py b/unstructured/ingest/runner/delta_table.py deleted file mode 100644 index 5dc418710..000000000 --- a/unstructured/ingest/runner/delta_table.py +++ /dev/null @@ -1,34 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.delta_table import SimpleDeltaTableConfig - - -@dataclass -class DeltaTableRunner(Runner): - connector_config: "SimpleDeltaTableConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - str(self.connector_config.table_uri).encode("utf-8"), - ) - self.read_config.download_dir = update_download_dir_hash( - connector_name="delta_table", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.delta_table import ( - DeltaTableSourceConnector, - ) - - return DeltaTableSourceConnector diff --git a/unstructured/ingest/runner/discord.py b/unstructured/ingest/runner/discord.py deleted file mode 100644 index 28f11a9be..000000000 --- a/unstructured/ingest/runner/discord.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.discord import SimpleDiscordConfig - - -@dataclass -class DiscordRunner(Runner): - connector_config: "SimpleDiscordConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - ",".join(self.connector_config.channels).encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="discord", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.discord import ( - DiscordSourceConnector, - ) - - return DiscordSourceConnector diff --git a/unstructured/ingest/runner/elasticsearch.py b/unstructured/ingest/runner/elasticsearch.py deleted file mode 100644 index a1cb75b84..000000000 --- a/unstructured/ingest/runner/elasticsearch.py +++ /dev/null @@ -1,40 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.elasticsearch import SimpleElasticsearchConfig - - -@dataclass -class ElasticSearchRunner(Runner): - connector_config: "SimpleElasticsearchConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - "{}_{}".format( - ",".join(self.connector_config.access_config.hosts), - self.connector_config.index_name, - ).encode( - "utf-8", - ), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="elasticsearch", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchSourceConnector, - ) - - return ElasticsearchSourceConnector diff --git a/unstructured/ingest/runner/fsspec/__init__.py b/unstructured/ingest/runner/fsspec/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/runner/fsspec/azure.py b/unstructured/ingest/runner/fsspec/azure.py deleted file mode 100644 index e92f4502f..000000000 --- a/unstructured/ingest/runner/fsspec/azure.py +++ /dev/null @@ -1,30 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.azure import SimpleAzureBlobStorageConfig - - -@dataclass -class AzureRunner(Runner): - connector_config: "SimpleAzureBlobStorageConfig" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="azure", - read_config=self.read_config, - remote_url=self.connector_config.remote_url, # type: ignore - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.azure import ( - AzureBlobStorageSourceConnector, - ) - - return AzureBlobStorageSourceConnector diff --git a/unstructured/ingest/runner/fsspec/box.py b/unstructured/ingest/runner/fsspec/box.py deleted file mode 100644 index c219576f5..000000000 --- a/unstructured/ingest/runner/fsspec/box.py +++ /dev/null @@ -1,28 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.box import SimpleBoxConfig - - -@dataclass -class BoxRunner(Runner): - connector_config: "SimpleBoxConfig" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="box", - read_config=self.read_config, - remote_url=self.connector_config.remote_url, # type: ignore - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.box import BoxSourceConnector - - return BoxSourceConnector diff --git a/unstructured/ingest/runner/fsspec/dropbox.py b/unstructured/ingest/runner/fsspec/dropbox.py deleted file mode 100644 index ef408918c..000000000 --- a/unstructured/ingest/runner/fsspec/dropbox.py +++ /dev/null @@ -1,30 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.dropbox import SimpleDropboxConfig - - -@dataclass -class DropboxRunner(Runner): - connector_config: "SimpleDropboxConfig" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="dropbox", - read_config=self.read_config, - remote_url=self.connector_config.remote_url, # type: ignore - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.dropbox import ( - DropboxSourceConnector, - ) - - return DropboxSourceConnector diff --git a/unstructured/ingest/runner/fsspec/fsspec.py b/unstructured/ingest/runner/fsspec/fsspec.py deleted file mode 100644 index e98251a81..000000000 --- a/unstructured/ingest/runner/fsspec/fsspec.py +++ /dev/null @@ -1,40 +0,0 @@ -import typing as t -import warnings -from dataclasses import dataclass -from urllib.parse import urlparse - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.fsspec import SimpleFsspecConfig - - -@dataclass -class FsspecRunner(Runner): - connector_config: "SimpleFsspecConfig" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="fsspec", - read_config=self.read_config, - remote_url=self.fsspec_config.remote_url, # type: ignore - logger=logger, - ) - - protocol = urlparse(self.fsspec_config.remote_url).scheme # type: ignore - warnings.warn( - f"`fsspec` protocol {protocol} is not directly supported by `unstructured`," - " so use it at your own risk. Supported protocols are `gcs`, `gs`, `s3`, `s3a`," - "`dropbox`, `abfs`, `az` and `sftp`.", - UserWarning, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.fsspec import ( - FsspecSourceConnector, - ) - - return FsspecSourceConnector diff --git a/unstructured/ingest/runner/fsspec/gcs.py b/unstructured/ingest/runner/fsspec/gcs.py deleted file mode 100644 index 1c3e043e3..000000000 --- a/unstructured/ingest/runner/fsspec/gcs.py +++ /dev/null @@ -1,28 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.gcs import SimpleGcsConfig - - -@dataclass -class GCSRunner(Runner): - connector_config: "SimpleGcsConfig" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="gcs", - read_config=self.read_config, - remote_url=self.connector_config.remote_url, # type: ignore - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.gcs import GcsSourceConnector - - return GcsSourceConnector diff --git a/unstructured/ingest/runner/fsspec/s3.py b/unstructured/ingest/runner/fsspec/s3.py deleted file mode 100644 index 086e2a58d..000000000 --- a/unstructured/ingest/runner/fsspec/s3.py +++ /dev/null @@ -1,28 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.s3 import SimpleS3Config - - -@dataclass -class S3Runner(Runner): - connector_config: "SimpleS3Config" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="s3", - read_config=self.read_config, - remote_url=self.connector_config.remote_url, # type: ignore - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.s3 import S3SourceConnector - - return S3SourceConnector diff --git a/unstructured/ingest/runner/fsspec/sftp.py b/unstructured/ingest/runner/fsspec/sftp.py deleted file mode 100644 index db73ad7e1..000000000 --- a/unstructured/ingest/runner/fsspec/sftp.py +++ /dev/null @@ -1,28 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_remote_url - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.sftp import SimpleSftpConfig - - -@dataclass -class SftpRunner(Runner): - connector_config: "SimpleSftpConfig" - - def update_read_config(self): - self.read_config.download_dir = update_download_dir_remote_url( - connector_name="sftp", - read_config=self.read_config, - remote_url=self.connector_config.remote_url, # type: ignore - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.fsspec.sftp import SftpSourceConnector - - return SftpSourceConnector diff --git a/unstructured/ingest/runner/github.py b/unstructured/ingest/runner/github.py deleted file mode 100644 index 86cf191be..000000000 --- a/unstructured/ingest/runner/github.py +++ /dev/null @@ -1,37 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.github import SimpleGitHubConfig - - -@dataclass -class GithubRunner(Runner): - connector_config: "SimpleGitHubConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - f"{self.connector_config.url}_{self.connector_config.branch}".encode( - "utf-8", - ), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="github", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.github import ( - GitHubSourceConnector, - ) - - return GitHubSourceConnector diff --git a/unstructured/ingest/runner/gitlab.py b/unstructured/ingest/runner/gitlab.py deleted file mode 100644 index c6b8e5c3a..000000000 --- a/unstructured/ingest/runner/gitlab.py +++ /dev/null @@ -1,37 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.gitlab import SimpleGitlabConfig - - -@dataclass -class GitlabRunner(Runner): - connector_config: "SimpleGitlabConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - f"{self.connector_config.url}_{self.connector_config.branch}".encode( - "utf-8", - ), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="gitlab", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.gitlab import ( - GitLabSourceConnector, - ) - - return GitLabSourceConnector diff --git a/unstructured/ingest/runner/google_drive.py b/unstructured/ingest/runner/google_drive.py deleted file mode 100644 index 8972c9a15..000000000 --- a/unstructured/ingest/runner/google_drive.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.google_drive import SimpleGoogleDriveConfig - - -@dataclass -class GoogleDriveRunner(Runner): - connector_config: "SimpleGoogleDriveConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.drive_id.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="google_drive", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.google_drive import ( - GoogleDriveSourceConnector, - ) - - return GoogleDriveSourceConnector diff --git a/unstructured/ingest/runner/hubspot.py b/unstructured/ingest/runner/hubspot.py deleted file mode 100644 index 2e988e759..000000000 --- a/unstructured/ingest/runner/hubspot.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.hubspot import SimpleHubSpotConfig - - -@dataclass -class HubSpotRunner(Runner): - connector_config: "SimpleHubSpotConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.access_config.api_token.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="hubspot", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.hubspot import ( - HubSpotSourceConnector, - ) - - return HubSpotSourceConnector diff --git a/unstructured/ingest/runner/jira.py b/unstructured/ingest/runner/jira.py deleted file mode 100644 index d632de9d8..000000000 --- a/unstructured/ingest/runner/jira.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.jira import SimpleJiraConfig - - -@dataclass -class JiraRunner(Runner): - connector_config: "SimpleJiraConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.url.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="jira", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.jira import ( - JiraSourceConnector, - ) - - return JiraSourceConnector diff --git a/unstructured/ingest/runner/kafka.py b/unstructured/ingest/runner/kafka.py deleted file mode 100644 index ba8a75094..000000000 --- a/unstructured/ingest/runner/kafka.py +++ /dev/null @@ -1,34 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.kafka import SimpleKafkaConfig - - -@dataclass -class KafkaRunner(Runner): - connector_config: "SimpleKafkaConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - str(self.connector_config.bootstrap_server).encode("utf-8"), - ) - self.read_config.download_dir = update_download_dir_hash( - connector_name="kafka", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.kafka import ( - KafkaSourceConnector, - ) - - return KafkaSourceConnector diff --git a/unstructured/ingest/runner/local.py b/unstructured/ingest/runner/local.py deleted file mode 100644 index a8c4ab19c..000000000 --- a/unstructured/ingest/runner/local.py +++ /dev/null @@ -1,23 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.runner.base_runner import Runner - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.local import SimpleLocalConfig - - -@dataclass -class LocalRunner(Runner): - connector_config: "SimpleLocalConfig" - - def update_read_config(self): - pass - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.local import ( - LocalSourceConnector, - ) - - return LocalSourceConnector diff --git a/unstructured/ingest/runner/mongodb.py b/unstructured/ingest/runner/mongodb.py deleted file mode 100644 index bdde249cd..000000000 --- a/unstructured/ingest/runner/mongodb.py +++ /dev/null @@ -1,34 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.mongodb import SimpleMongoDBConfig - - -@dataclass -class MongoDBRunner(Runner): - connector_config: "SimpleMongoDBConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - str(self.connector_config.access_config.uri).encode("utf-8"), - ) - self.read_config.download_dir = update_download_dir_hash( - connector_name="mongodb", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.mongodb import ( - MongoDBSourceConnector, - ) - - return MongoDBSourceConnector diff --git a/unstructured/ingest/runner/notion.py b/unstructured/ingest/runner/notion.py deleted file mode 100644 index ee7fd9c5e..000000000 --- a/unstructured/ingest/runner/notion.py +++ /dev/null @@ -1,61 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.notion.connector import SimpleNotionConfig - - -@dataclass -class NotionRunner(Runner): - connector_config: "SimpleNotionConfig" - - def update_read_config(self): - if not self.connector_config.page_ids and not self.connector_config.database_ids: - raise ValueError("no page ids nor database ids provided") - - if self.connector_config.page_ids and self.connector_config.database_ids: - hashed_dir_name = hashlib.sha256( - "{},{}".format( - ",".join(self.connector_config.page_ids), - ",".join(self.connector_config.database_ids), - ).encode("utf-8"), - ) - elif self.connector_config.page_ids: - hashed_dir_name = hashlib.sha256( - ",".join(self.connector_config.page_ids).encode("utf-8"), - ) - elif self.connector_config.database_ids: - hashed_dir_name = hashlib.sha256( - ",".join(self.connector_config.database_ids).encode("utf-8"), - ) - else: - raise ValueError("could not create local cache directory name") - - self.read_config.download_dir = update_download_dir_hash( - connector_name="notion", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.notion.connector import ( - NotionSourceConnector, - ) - - return NotionSourceConnector - - def get_source_connector(self) -> BaseSourceConnector: - source_connector_cls = self.get_source_connector_cls() - return source_connector_cls( - processor_config=self.processor_config, - connector_config=self.connector_config, - read_config=self.read_config, - retry_strategy_config=self.retry_strategy_config, - ) diff --git a/unstructured/ingest/runner/onedrive.py b/unstructured/ingest/runner/onedrive.py deleted file mode 100644 index 6c2312614..000000000 --- a/unstructured/ingest/runner/onedrive.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.onedrive import SimpleOneDriveConfig - - -@dataclass -class OneDriveRunner(Runner): - connector_config: "SimpleOneDriveConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - f"{self.connector_config.tenant}_{self.connector_config.user_pname}".encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="onedrive", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.onedrive import ( - OneDriveSourceConnector, - ) - - return OneDriveSourceConnector diff --git a/unstructured/ingest/runner/opensearch.py b/unstructured/ingest/runner/opensearch.py deleted file mode 100644 index e3ce03a71..000000000 --- a/unstructured/ingest/runner/opensearch.py +++ /dev/null @@ -1,40 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.opensearch import SimpleOpenSearchConfig - - -@dataclass -class OpenSearchRunner(Runner): - connector_config: "SimpleOpenSearchConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - "{}_{}".format( - ",".join(self.connector_config.access_config.hosts), - self.connector_config.index_name, - ).encode( - "utf-8", - ), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="opensearch", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.opensearch import ( - OpenSearchSourceConnector, - ) - - return OpenSearchSourceConnector diff --git a/unstructured/ingest/runner/outlook.py b/unstructured/ingest/runner/outlook.py deleted file mode 100644 index 3672dacef..000000000 --- a/unstructured/ingest/runner/outlook.py +++ /dev/null @@ -1,33 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.outlook import SimpleOutlookConfig - - -@dataclass -class OutlookRunner(Runner): - connector_config: "SimpleOutlookConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256(self.connector_config.user_email.encode("utf-8")) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="outlook", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.outlook import ( - OutlookSourceConnector, - ) - - return OutlookSourceConnector diff --git a/unstructured/ingest/runner/reddit.py b/unstructured/ingest/runner/reddit.py deleted file mode 100644 index 0d59acd74..000000000 --- a/unstructured/ingest/runner/reddit.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.reddit import SimpleRedditConfig - - -@dataclass -class RedditRunner(Runner): - connector_config: "SimpleRedditConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.subreddit_name.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="reddit", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.reddit import ( - RedditSourceConnector, - ) - - return RedditSourceConnector diff --git a/unstructured/ingest/runner/salesforce.py b/unstructured/ingest/runner/salesforce.py deleted file mode 100644 index 06326e556..000000000 --- a/unstructured/ingest/runner/salesforce.py +++ /dev/null @@ -1,33 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.salesforce import SimpleSalesforceConfig - - -@dataclass -class SalesforceRunner(Runner): - connector_config: "SimpleSalesforceConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256(self.connector_config.username.encode("utf-8")) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="salesforce", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.salesforce import ( - SalesforceSourceConnector, - ) - - return SalesforceSourceConnector diff --git a/unstructured/ingest/runner/sharepoint.py b/unstructured/ingest/runner/sharepoint.py deleted file mode 100644 index f5e0dd36b..000000000 --- a/unstructured/ingest/runner/sharepoint.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.sharepoint import SimpleSharepointConfig - - -@dataclass -class SharePointRunner(Runner): - connector_config: "SimpleSharepointConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - f"{self.connector_config.site}_{self.connector_config.path}".encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="sharepoint", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.sharepoint import ( - SharepointSourceConnector, - ) - - return SharepointSourceConnector diff --git a/unstructured/ingest/runner/slack.py b/unstructured/ingest/runner/slack.py deleted file mode 100644 index 2d4231473..000000000 --- a/unstructured/ingest/runner/slack.py +++ /dev/null @@ -1,33 +0,0 @@ -import hashlib -import typing as t - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.slack import SimpleSlackConfig - - -class SlackRunner(Runner): - connector_config: "SimpleSlackConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - ",".join(self.connector_config.channels).encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="slack", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.slack import ( - SlackSourceConnector, - ) - - return SlackSourceConnector diff --git a/unstructured/ingest/runner/utils.py b/unstructured/ingest/runner/utils.py deleted file mode 100644 index 0816923ed..000000000 --- a/unstructured/ingest/runner/utils.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -import hashlib -import logging -from pathlib import Path - -from unstructured.ingest.interfaces import ( - ReadConfig, -) - - -def update_download_dir_remote_url( - connector_name: str, - read_config: ReadConfig, - remote_url: str, - logger: logging.Logger, -) -> str: - hashed_dir_name = hashlib.sha256(remote_url.encode("utf-8")) - return update_download_dir_hash( - connector_name=connector_name, - read_config=read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - -def update_download_dir_hash( - connector_name: str, - read_config: ReadConfig, - hashed_dir_name: hashlib._Hash, - logger: logging.Logger, -) -> str: - if not read_config.download_dir: - cache_path = Path.home() / ".cache" / "unstructured" / "ingest" - if not cache_path.exists(): - cache_path.mkdir(parents=True, exist_ok=True) - download_dir = cache_path / connector_name / hashed_dir_name.hexdigest()[:10] - if read_config.preserve_downloads: - logger.warning( - f"Preserving downloaded files but download_dir is not specified," - f" using {download_dir}", - ) - new_download_dir = str(download_dir) - logger.debug(f"updating download directory to: {new_download_dir}") - else: - new_download_dir = read_config.download_dir - return new_download_dir diff --git a/unstructured/ingest/runner/wikipedia.py b/unstructured/ingest/runner/wikipedia.py deleted file mode 100644 index 7a67dcd43..000000000 --- a/unstructured/ingest/runner/wikipedia.py +++ /dev/null @@ -1,35 +0,0 @@ -import hashlib -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseSourceConnector -from unstructured.ingest.logger import logger -from unstructured.ingest.runner.base_runner import Runner -from unstructured.ingest.runner.utils import update_download_dir_hash - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.wikipedia import SimpleWikipediaConfig - - -@dataclass -class WikipediaRunner(Runner): - connector_config: "SimpleWikipediaConfig" - - def update_read_config(self): - hashed_dir_name = hashlib.sha256( - self.connector_config.page_title.encode("utf-8"), - ) - - self.read_config.download_dir = update_download_dir_hash( - connector_name="wikipedia", - read_config=self.read_config, - hashed_dir_name=hashed_dir_name, - logger=logger, - ) - - def get_source_connector_cls(self) -> t.Type[BaseSourceConnector]: - from unstructured.ingest.connector.wikipedia import ( - WikipediaSourceConnector, - ) - - return WikipediaSourceConnector diff --git a/unstructured/ingest/runner/writers/__init__.py b/unstructured/ingest/runner/writers/__init__.py deleted file mode 100644 index 8b07adb9e..000000000 --- a/unstructured/ingest/runner/writers/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -import typing as t - -from .astradb import AstraDBWriter -from .azure_cognitive_search import AzureCognitiveSearchWriter -from .base_writer import Writer -from .chroma import ChromaWriter -from .clarifai import ClarifaiWriter -from .databricks_volumes import DatabricksVolumesWriter -from .delta_table import DeltaTableWriter -from .elasticsearch import ElasticsearchWriter -from .fsspec.azure import AzureWriter -from .fsspec.box import BoxWriter -from .fsspec.dropbox import DropboxWriter -from .fsspec.gcs import GcsWriter -from .fsspec.s3 import S3Writer -from .kafka import KafkaWriter -from .mongodb import MongodbWriter -from .opensearch import OpenSearchWriter -from .pinecone import PineconeWriter -from .qdrant import QdrantWriter -from .sql import SqlWriter -from .vectara import VectaraWriter -from .weaviate import WeaviateWriter - -writer_map: t.Dict[str, t.Type[Writer]] = { - "astradb": AstraDBWriter, - "azure": AzureWriter, - "azure_cognitive_search": AzureCognitiveSearchWriter, - "box": BoxWriter, - "chroma": ChromaWriter, - "clarifai": ClarifaiWriter, - "databricks_volumes": DatabricksVolumesWriter, - "delta_table": DeltaTableWriter, - "dropbox": DropboxWriter, - "elasticsearch": ElasticsearchWriter, - "gcs": GcsWriter, - "kafka": KafkaWriter, - "mongodb": MongodbWriter, - "opensearch": OpenSearchWriter, - "pinecone": PineconeWriter, - "qdrant": QdrantWriter, - "s3": S3Writer, - "sql": SqlWriter, - "vectara": VectaraWriter, - "weaviate": WeaviateWriter, -} - -__all__ = ["writer_map"] diff --git a/unstructured/ingest/runner/writers/astradb.py b/unstructured/ingest/runner/writers/astradb.py deleted file mode 100644 index b12ee7234..000000000 --- a/unstructured/ingest/runner/writers/astradb.py +++ /dev/null @@ -1,22 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.astradb import AstraDBWriteConfig, SimpleAstraDBConfig - - -@dataclass -class AstraDBWriter(Writer, EnhancedDataClassJsonMixin): - write_config: "AstraDBWriteConfig" - connector_config: "SimpleAstraDBConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.astradb import ( - AstraDBDestinationConnector, - ) - - return AstraDBDestinationConnector diff --git a/unstructured/ingest/runner/writers/azure_cognitive_search.py b/unstructured/ingest/runner/writers/azure_cognitive_search.py deleted file mode 100644 index 69204e3f3..000000000 --- a/unstructured/ingest/runner/writers/azure_cognitive_search.py +++ /dev/null @@ -1,24 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.azure_cognitive_search import ( - AzureCognitiveSearchWriteConfig, - SimpleAzureCognitiveSearchStorageConfig, - ) - - -@dataclass -class AzureCognitiveSearchWriter(Writer): - connector_config: "SimpleAzureCognitiveSearchStorageConfig" - write_config: "AzureCognitiveSearchWriteConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.azure_cognitive_search import ( - AzureCognitiveSearchDestinationConnector, - ) - - return AzureCognitiveSearchDestinationConnector diff --git a/unstructured/ingest/runner/writers/base_writer.py b/unstructured/ingest/runner/writers/base_writer.py deleted file mode 100644 index e28d11b07..000000000 --- a/unstructured/ingest/runner/writers/base_writer.py +++ /dev/null @@ -1,26 +0,0 @@ -import typing as t -from abc import ABC, abstractmethod -from dataclasses import dataclass - -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseDestinationConnector, - WriteConfig, -) - - -@dataclass -class Writer(ABC): - connector_config: BaseConnectorConfig - write_config: WriteConfig - - @abstractmethod - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - pass - - def get_connector(self, **kwargs) -> BaseDestinationConnector: - connector_cls = self.get_connector_cls() - return connector_cls( - write_config=self.write_config, - connector_config=self.connector_config, - ) diff --git a/unstructured/ingest/runner/writers/chroma.py b/unstructured/ingest/runner/writers/chroma.py deleted file mode 100644 index e41753d01..000000000 --- a/unstructured/ingest/runner/writers/chroma.py +++ /dev/null @@ -1,22 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.chroma import ChromaWriteConfig, SimpleChromaConfig - - -@dataclass -class ChromaWriter(Writer, EnhancedDataClassJsonMixin): - write_config: "ChromaWriteConfig" - connector_config: "SimpleChromaConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.chroma import ( - ChromaDestinationConnector, - ) - - return ChromaDestinationConnector diff --git a/unstructured/ingest/runner/writers/clarifai.py b/unstructured/ingest/runner/writers/clarifai.py deleted file mode 100644 index 9742e1eee..000000000 --- a/unstructured/ingest/runner/writers/clarifai.py +++ /dev/null @@ -1,19 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.clarifai import ClarifaiWriteConfig, SimpleClarifaiConfig - - -@dataclass -class ClarifaiWriter(Writer): - write_config: "ClarifaiWriteConfig" - connector_config: "SimpleClarifaiConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.clarifai import ClarifaiDestinationConnector - - return ClarifaiDestinationConnector diff --git a/unstructured/ingest/runner/writers/databricks_volumes.py b/unstructured/ingest/runner/writers/databricks_volumes.py deleted file mode 100644 index 74703f850..000000000 --- a/unstructured/ingest/runner/writers/databricks_volumes.py +++ /dev/null @@ -1,25 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.databricks_volumes import ( - DatabricksVolumesWriteConfig, - SimpleDatabricksVolumesConfig, - ) - - -@dataclass -class DatabricksVolumesWriter(Writer, EnhancedDataClassJsonMixin): - write_config: "DatabricksVolumesWriteConfig" - connector_config: "SimpleDatabricksVolumesConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.databricks_volumes import ( - DatabricksVolumesDestinationConnector, - ) - - return DatabricksVolumesDestinationConnector diff --git a/unstructured/ingest/runner/writers/delta_table.py b/unstructured/ingest/runner/writers/delta_table.py deleted file mode 100644 index 6337e03d9..000000000 --- a/unstructured/ingest/runner/writers/delta_table.py +++ /dev/null @@ -1,24 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.delta_table import ( - DeltaTableWriteConfig, - SimpleDeltaTableConfig, - ) - - -@dataclass -class DeltaTableWriter(Writer): - write_config: "DeltaTableWriteConfig" - connector_config: "SimpleDeltaTableConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.delta_table import ( - DeltaTableDestinationConnector, - ) - - return DeltaTableDestinationConnector diff --git a/unstructured/ingest/runner/writers/elasticsearch.py b/unstructured/ingest/runner/writers/elasticsearch.py deleted file mode 100644 index 7ce8b451f..000000000 --- a/unstructured/ingest/runner/writers/elasticsearch.py +++ /dev/null @@ -1,24 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchWriteConfig, - SimpleElasticsearchConfig, - ) - - -@dataclass -class ElasticsearchWriter(Writer): - connector_config: "SimpleElasticsearchConfig" - write_config: "ElasticsearchWriteConfig" - - def get_connector_cls(self) -> BaseDestinationConnector: - from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchDestinationConnector, - ) - - return ElasticsearchDestinationConnector diff --git a/unstructured/ingest/runner/writers/fsspec/__init__.py b/unstructured/ingest/runner/writers/fsspec/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/runner/writers/fsspec/azure.py b/unstructured/ingest/runner/writers/fsspec/azure.py deleted file mode 100644 index 66835898e..000000000 --- a/unstructured/ingest/runner/writers/fsspec/azure.py +++ /dev/null @@ -1,24 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.azure import ( - AzureWriteConfig, - SimpleAzureBlobStorageConfig, - ) - - -@dataclass -class AzureWriter(Writer): - connector_config: "SimpleAzureBlobStorageConfig" - write_config: "AzureWriteConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.fsspec.azure import ( - AzureBlobStorageDestinationConnector, - ) - - return AzureBlobStorageDestinationConnector diff --git a/unstructured/ingest/runner/writers/fsspec/box.py b/unstructured/ingest/runner/writers/fsspec/box.py deleted file mode 100644 index 5f4599a40..000000000 --- a/unstructured/ingest/runner/writers/fsspec/box.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.box import BoxWriteConfig, SimpleBoxConfig - - -@dataclass -class BoxWriter(Writer): - connector_config: "SimpleBoxConfig" - write_config: "BoxWriteConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.fsspec.box import ( - BoxDestinationConnector, - ) - - return BoxDestinationConnector diff --git a/unstructured/ingest/runner/writers/fsspec/dropbox.py b/unstructured/ingest/runner/writers/fsspec/dropbox.py deleted file mode 100644 index 0c9389079..000000000 --- a/unstructured/ingest/runner/writers/fsspec/dropbox.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.dropbox import DropboxWriteConfig, SimpleDropboxConfig - - -@dataclass -class DropboxWriter(Writer): - connector_config: "SimpleDropboxConfig" - write_config: "DropboxWriteConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.fsspec.dropbox import ( - DropboxDestinationConnector, - ) - - return DropboxDestinationConnector diff --git a/unstructured/ingest/runner/writers/fsspec/gcs.py b/unstructured/ingest/runner/writers/fsspec/gcs.py deleted file mode 100644 index 728a109d2..000000000 --- a/unstructured/ingest/runner/writers/fsspec/gcs.py +++ /dev/null @@ -1,19 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.gcs import GcsWriteConfig, SimpleGcsConfig - - -@dataclass -class GcsWriter(Writer): - connector_config: "SimpleGcsConfig" - write_config: "GcsWriteConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.fsspec.gcs import GcsDestinationConnector - - return GcsDestinationConnector diff --git a/unstructured/ingest/runner/writers/fsspec/s3.py b/unstructured/ingest/runner/writers/fsspec/s3.py deleted file mode 100644 index 64d2b3131..000000000 --- a/unstructured/ingest/runner/writers/fsspec/s3.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.fsspec.s3 import S3WriteConfig, SimpleS3Config - - -@dataclass -class S3Writer(Writer): - connector_config: "SimpleS3Config" - write_config: "S3WriteConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.fsspec.s3 import ( - S3DestinationConnector, - ) - - return S3DestinationConnector diff --git a/unstructured/ingest/runner/writers/kafka.py b/unstructured/ingest/runner/writers/kafka.py deleted file mode 100644 index f8e5a3e3d..000000000 --- a/unstructured/ingest/runner/writers/kafka.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.kafka import KafkaWriteConfig, SimpleKafkaConfig - - -@dataclass -class KafkaWriter(Writer): - write_config: "KafkaWriteConfig" - connector_config: "SimpleKafkaConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.kafka import ( - KafkaDestinationConnector, - ) - - return KafkaDestinationConnector diff --git a/unstructured/ingest/runner/writers/mongodb.py b/unstructured/ingest/runner/writers/mongodb.py deleted file mode 100644 index 5798a0161..000000000 --- a/unstructured/ingest/runner/writers/mongodb.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.mongodb import MongoDBWriteConfig, SimpleMongoDBConfig - - -@dataclass -class MongodbWriter(Writer): - write_config: "MongoDBWriteConfig" - connector_config: "SimpleMongoDBConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.mongodb import ( - MongoDBDestinationConnector, - ) - - return MongoDBDestinationConnector diff --git a/unstructured/ingest/runner/writers/opensearch.py b/unstructured/ingest/runner/writers/opensearch.py deleted file mode 100644 index f0c62b578..000000000 --- a/unstructured/ingest/runner/writers/opensearch.py +++ /dev/null @@ -1,26 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.elasticsearch import ( - ElasticsearchWriteConfig, - ) - from unstructured.ingest.connector.opensearch import ( - SimpleOpenSearchConfig, - ) - - -@dataclass -class OpenSearchWriter(Writer): - connector_config: "SimpleOpenSearchConfig" - write_config: "ElasticsearchWriteConfig" - - def get_connector_cls(self) -> BaseDestinationConnector: - from unstructured.ingest.connector.opensearch import ( - OpenSearchDestinationConnector, - ) - - return OpenSearchDestinationConnector diff --git a/unstructured/ingest/runner/writers/pinecone.py b/unstructured/ingest/runner/writers/pinecone.py deleted file mode 100644 index 86fd9580a..000000000 --- a/unstructured/ingest/runner/writers/pinecone.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.pinecone import PineconeWriteConfig, SimplePineconeConfig - - -@dataclass -class PineconeWriter(Writer): - write_config: "PineconeWriteConfig" - connector_config: "SimplePineconeConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.pinecone import ( - PineconeDestinationConnector, - ) - - return PineconeDestinationConnector diff --git a/unstructured/ingest/runner/writers/qdrant.py b/unstructured/ingest/runner/writers/qdrant.py deleted file mode 100644 index e7e632405..000000000 --- a/unstructured/ingest/runner/writers/qdrant.py +++ /dev/null @@ -1,19 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.qdrant import QdrantWriteConfig, SimpleQdrantConfig - - -@dataclass -class QdrantWriter(Writer): - write_config: "QdrantWriteConfig" - connector_config: "SimpleQdrantConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.qdrant import QdrantDestinationConnector - - return QdrantDestinationConnector diff --git a/unstructured/ingest/runner/writers/sql.py b/unstructured/ingest/runner/writers/sql.py deleted file mode 100644 index 70c710a1f..000000000 --- a/unstructured/ingest/runner/writers/sql.py +++ /dev/null @@ -1,22 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.sql import SimpleSqlConfig - from unstructured.ingest.interfaces import WriteConfig - - -@dataclass -class SqlWriter(Writer): - write_config: "WriteConfig" - connector_config: "SimpleSqlConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.sql import ( - SqlDestinationConnector, - ) - - return SqlDestinationConnector diff --git a/unstructured/ingest/runner/writers/vectara.py b/unstructured/ingest/runner/writers/vectara.py deleted file mode 100644 index f29128022..000000000 --- a/unstructured/ingest/runner/writers/vectara.py +++ /dev/null @@ -1,22 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.vectara import SimpleVectaraConfig, VectaraWriteConfig - - -@dataclass -class VectaraWriter(Writer, EnhancedDataClassJsonMixin): - write_config: "VectaraWriteConfig" - connector_config: "SimpleVectaraConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.vectara import ( - VectaraDestinationConnector, - ) - - return VectaraDestinationConnector diff --git a/unstructured/ingest/runner/writers/weaviate.py b/unstructured/ingest/runner/writers/weaviate.py deleted file mode 100644 index 96c7b0071..000000000 --- a/unstructured/ingest/runner/writers/weaviate.py +++ /dev/null @@ -1,21 +0,0 @@ -import typing as t -from dataclasses import dataclass - -from unstructured.ingest.interfaces import BaseDestinationConnector -from unstructured.ingest.runner.writers.base_writer import Writer - -if t.TYPE_CHECKING: - from unstructured.ingest.connector.weaviate import SimpleWeaviateConfig, WeaviateWriteConfig - - -@dataclass -class WeaviateWriter(Writer): - write_config: "WeaviateWriteConfig" - connector_config: "SimpleWeaviateConfig" - - def get_connector_cls(self) -> t.Type[BaseDestinationConnector]: - from unstructured.ingest.connector.weaviate import ( - WeaviateDestinationConnector, - ) - - return WeaviateDestinationConnector diff --git a/unstructured/ingest/utils/__init__.py b/unstructured/ingest/utils/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/utils/compression.py b/unstructured/ingest/utils/compression.py deleted file mode 100644 index 41f4b3240..000000000 --- a/unstructured/ingest/utils/compression.py +++ /dev/null @@ -1,117 +0,0 @@ -import copy -import os -import sys -import tarfile -import zipfile -from dataclasses import dataclass -from pathlib import Path -from typing import List, Optional - -from unstructured.ingest.connector.local import LocalSourceConnector, SimpleLocalConfig -from unstructured.ingest.interfaces import ( - BaseConnectorConfig, - BaseSingleIngestDoc, - ProcessorConfig, - ReadConfig, -) -from unstructured.ingest.logger import logger - -ZIP_FILE_EXT = [".zip"] -TAR_FILE_EXT = [".tar", ".tar.gz", ".tgz"] - - -def uncompress_file(filename: str, path: Optional[str] = None) -> str: - """ - Takes in a compressed zip or tar file and uncompresses it - """ - # Create path if it doesn't already exist - if path: - Path(path).mkdir(parents=True, exist_ok=True) - - if any(filename.endswith(ext) for ext in ZIP_FILE_EXT): - return uncompress_zip_file(zip_filename=filename, path=path) - elif any(filename.endswith(ext) for ext in TAR_FILE_EXT): - return uncompress_tar_file(tar_filename=filename, path=path) - else: - raise ValueError( - "filename {} not a recognized compressed extension: {}".format( - filename, - ", ".join(ZIP_FILE_EXT + TAR_FILE_EXT), - ), - ) - - -def uncompress_zip_file(zip_filename: str, path: Optional[str] = None) -> str: - head, tail = os.path.split(zip_filename) - for ext in ZIP_FILE_EXT: - if tail.endswith(ext): - tail = tail[: -(len(ext))] - break - path = path if path else os.path.join(head, f"{tail}-zip-uncompressed") - logger.info(f"extracting zip {zip_filename} -> {path}") - with zipfile.ZipFile(zip_filename) as zfile: - zfile.extractall(path=path) - return path - - -def uncompress_tar_file(tar_filename: str, path: Optional[str] = None) -> str: - head, tail = os.path.split(tar_filename) - for ext in TAR_FILE_EXT: - if tail.endswith(ext): - tail = tail[: -(len(ext))] - break - - path = path if path else os.path.join(head, f"{tail}-tar-uncompressed") - logger.info(f"extracting tar {tar_filename} -> {path}") - with tarfile.open(tar_filename, "r:gz") as tfile: - # NOTE(robinson: Mitigate against malicious content being extracted from the tar file. - # This was added in Python 3.12 - # Ref: https://docs.python.org/3/library/tarfile.html#extraction-filters - if sys.version_info >= (3, 12): - tfile.extraction_filter = tarfile.tar_filter - else: - logger.warning( - "Extraction filtering for tar files is available for Python 3.12 and above. " - "Consider upgrading your Python version to improve security. " - "See https://docs.python.org/3/library/tarfile.html#extraction-filters" - ) - tfile.extractall(path=path) - return path - - -@dataclass -class CompressionSourceConnectorMixin: - processor_config: ProcessorConfig - read_config: ReadConfig - connector_config: BaseConnectorConfig - - def process_compressed_doc(self, doc: BaseSingleIngestDoc) -> List[BaseSingleIngestDoc]: - """ - Utility function which helps process compressed files. Extracts the contents and returns - generated ingest docs via local source connector - """ - # Download the raw file to local - doc.get_file() - path = uncompress_file(filename=str(doc.filename)) - new_read_configs = copy.copy(self.read_config) - new_process_configs = copy.copy(self.processor_config) - relative_path = path.replace(self.read_config.download_dir, "") - - if self.processor_config.output_dir.endswith(os.sep): - new_process_configs.output_dir = f"{self.processor_config.output_dir}{relative_path}" - else: - new_process_configs.output_dir = ( - f"{self.processor_config.output_dir}{os.sep}{relative_path}" - ) - - local_connector = LocalSourceConnector( - connector_config=SimpleLocalConfig( - input_path=path, - recursive=True, - ), - read_config=new_read_configs, - processor_config=new_process_configs, - ) - logger.info(f"Created local source connector: {local_connector.to_json()}") - local_connector.initialize() - return local_connector.get_ingest_docs() diff --git a/unstructured/ingest/utils/data_prep.py b/unstructured/ingest/utils/data_prep.py deleted file mode 100644 index 722de16e4..000000000 --- a/unstructured/ingest/utils/data_prep.py +++ /dev/null @@ -1,29 +0,0 @@ -import itertools -import json - - -def batch_generator(iterable, batch_size=100): - """A helper function to break an iterable into batches of size batch_size.""" - it = iter(iterable) - chunk = tuple(itertools.islice(it, batch_size)) - while chunk: - yield chunk - chunk = tuple(itertools.islice(it, batch_size)) - - -def generator_batching_wbytes(iterable, batch_size_limit_bytes=15_000_000): - """A helper function to break an iterable into chunks of specified bytes.""" - current_batch, current_batch_size = [], 0 - - for item in iterable: - item_size_bytes = len(json.dumps(item).encode("utf-8")) - - if current_batch_size + item_size_bytes <= batch_size_limit_bytes: - current_batch.append(item) - current_batch_size += item_size_bytes - else: - yield current_batch - current_batch, current_batch_size = [item], item_size_bytes - - if current_batch: - yield current_batch diff --git a/unstructured/ingest/utils/string_and_date_utils.py b/unstructured/ingest/utils/string_and_date_utils.py deleted file mode 100644 index 89f1ca84d..000000000 --- a/unstructured/ingest/utils/string_and_date_utils.py +++ /dev/null @@ -1,39 +0,0 @@ -import json -import typing as t -from datetime import datetime - -from dateutil import parser - - -def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]: - """Helper function attempts to deserialize json string to a dictionary.""" - try: - return json.loads(json_string) - except json.JSONDecodeError: - # Not neccessary an error if it is a path or malformed json - pass - try: - # This is common when single quotes are used instead of double quotes - return json.loads(json_string.replace("'", '"')) - except json.JSONDecodeError: - # Not neccessary an error if it is a path - pass - return json_string - - -def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str: - """ - Ensures that the input value is converted to an ISO format datetime string. - Handles both datetime objects and strings. - """ - if isinstance(timestamp, datetime): - return timestamp.isoformat() - elif isinstance(timestamp, str): - try: - # Parse the datetime string in various formats - dt = parser.parse(timestamp) - return dt.isoformat() - except ValueError as e: - raise ValueError(f"String '{timestamp}' could not be parsed as a datetime.") from e - else: - raise TypeError(f"Expected input type datetime or str, but got {type(timestamp)}.") diff --git a/unstructured/ingest/utils/table.py b/unstructured/ingest/utils/table.py deleted file mode 100644 index 65fd7b92f..000000000 --- a/unstructured/ingest/utils/table.py +++ /dev/null @@ -1,24 +0,0 @@ -import typing as t - -import pandas as pd - -from unstructured.staging.base import flatten_dict, get_default_pandas_dtypes - - -def convert_to_pandas_dataframe( - elements_dict: t.List[t.Dict[str, t.Any]], - drop_empty_cols: bool = False, -) -> pd.DataFrame: - # Flatten metadata if it hasn't already been flattened - for d in elements_dict: - if metadata := d.pop("metadata", None): - d.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"])) - - df = pd.DataFrame.from_dict( - elements_dict, - ) - dt = {k: v for k, v in get_default_pandas_dtypes().items() if k in df.columns} - df = df.astype(dt) - if drop_empty_cols: - df.dropna(axis=1, how="all", inplace=True) - return df diff --git a/unstructured/ingest/v2/README.md b/unstructured/ingest/v2/README.md deleted file mode 100644 index f7291aa5a..000000000 --- a/unstructured/ingest/v2/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Ingest -![Project unmaintained](https://img.shields.io/badge/project-unmaintained-red.svg) - -Project has been moved to: [Unstructured Ingest](https://github.com/Unstructured-IO/unstructured-ingest) - -This python module will be removed from this repo in the near future. diff --git a/unstructured/ingest/v2/__init__.py b/unstructured/ingest/v2/__init__.py deleted file mode 100644 index 9d48db4f9..000000000 --- a/unstructured/ingest/v2/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from __future__ import annotations diff --git a/unstructured/ingest/v2/assets/pipeline.excalidraw b/unstructured/ingest/v2/assets/pipeline.excalidraw deleted file mode 100644 index d59bc99dd..000000000 --- a/unstructured/ingest/v2/assets/pipeline.excalidraw +++ /dev/null @@ -1,1417 +0,0 @@ -{ - "type": "excalidraw", - "version": 2, - "source": "https://excalidraw.com", - "elements": [ - { - "id": "Y3a1yUDvwFK9AB6KmSl9a", - "type": "rectangle", - "x": 637.48046875, - "y": 239.11328125, - "width": 322.44921875, - "height": 97.015625, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1a", - "roundness": { - "type": 3 - }, - "seed": 2131406971, - "version": 139, - "versionNonce": 1482689781, - "isDeleted": false, - "boundElements": [ - { - "type": "text", - "id": "7paHS6cDsoMgh1vsOhizN" - }, - { - "id": "e6DNVpQ-gH7v6WNDWWSPD", - "type": "arrow" - } - ], - "updated": 1715951675553, - "link": null, - "locked": false - }, - { - "id": "7paHS6cDsoMgh1vsOhizN", - "type": "text", - "x": 759.9351119995117, - "y": 275.12109375, - "width": 77.53993225097656, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1b", - "roundness": null, - "seed": 860081397, - "version": 12, - "versionNonce": 1588840341, - "isDeleted": false, - "boundElements": null, - "updated": 1715951674833, - "link": null, - "locked": false, - "text": "Indexing", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "Y3a1yUDvwFK9AB6KmSl9a", - "originalText": "Indexing", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 205, - "versionNonce": 1999066491, - "index": "b1c", - "isDeleted": false, - "id": "LZrKOvKX6nGWVOrEpPaPS", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 637.244140625, - "y": 406.7421875, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "width": 322.44921875, - "height": 97.015625, - "seed": 882087163, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "SjYgGO3cAHPreH7mJVBdm" - }, - { - "id": "e6DNVpQ-gH7v6WNDWWSPD", - "type": "arrow" - }, - { - "id": "Dn6kngn7QXyxmlCbzgO2R", - "type": "arrow" - } - ], - "updated": 1715951678396, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 88, - "versionNonce": 1992691451, - "index": "b1d", - "isDeleted": false, - "id": "SjYgGO3cAHPreH7mJVBdm", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 741.9687957763672, - "y": 442.75, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 112.99990844726562, - "height": 25, - "seed": 820854171, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951530614, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Downloading", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "LZrKOvKX6nGWVOrEpPaPS", - "originalText": "Downloading", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 252, - "versionNonce": 1617745173, - "index": "b1e", - "isDeleted": false, - "id": "62UjU0YjVR7TvLe7hLQCV", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "dotted", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 644.884765625, - "y": 586.75390625, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "width": 322.44921875, - "height": 97.015625, - "seed": 1549110491, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "vRabBFX0KOEkJ6d4rZF5D" - }, - { - "id": "Dn6kngn7QXyxmlCbzgO2R", - "type": "arrow" - }, - { - "id": "0Q1io01It2PX9ESFiW49G", - "type": "arrow" - } - ], - "updated": 1715951680142, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 146, - "versionNonce": 1440901275, - "index": "b1f", - "isDeleted": false, - "id": "vRabBFX0KOEkJ6d4rZF5D", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 739.9794387817383, - "y": 622.76171875, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 132.25987243652344, - "height": 25, - "seed": 560281979, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951539363, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Uncompressing", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "62UjU0YjVR7TvLe7hLQCV", - "originalText": "Uncompressing", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 329, - "versionNonce": 1236647227, - "index": "b1g", - "isDeleted": false, - "id": "GZLTgdXXsgXo-4rDdd7BN", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 642.740234375, - "y": 752.87109375, - "strokeColor": "#1e1e1e", - "backgroundColor": "#a5d8ff", - "width": 322.44921875, - "height": 97.015625, - "seed": 857787003, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "3nbrNuxDWK3BIkJVVUKYs" - }, - { - "id": "0Q1io01It2PX9ESFiW49G", - "type": "arrow" - }, - { - "id": "5rxlnALV4R8RNKSSzjawZ", - "type": "arrow" - } - ], - "updated": 1715951692576, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 237, - "versionNonce": 1218981717, - "index": "b1h", - "isDeleted": false, - "id": "3nbrNuxDWK3BIkJVVUKYs", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 748.6249008178711, - "y": 788.87890625, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 110.67988586425781, - "height": 25, - "seed": 590856987, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951571504, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Partitioning", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "GZLTgdXXsgXo-4rDdd7BN", - "originalText": "Partitioning", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 425, - "versionNonce": 1862353237, - "index": "b1i", - "isDeleted": false, - "id": "JGKFyGpX1KS2mJhIpFiBT", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "dotted", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 642.431640625, - "y": 916.02734375, - "strokeColor": "#1e1e1e", - "backgroundColor": "#eebefa", - "width": 322.44921875, - "height": 97.015625, - "seed": 1945073307, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "mPevqaKIOyvM1_XLXsPLZ" - }, - { - "id": "5rxlnALV4R8RNKSSzjawZ", - "type": "arrow" - }, - { - "id": "xsN-wlmdU5K7UGi95CYsI", - "type": "arrow" - } - ], - "updated": 1715951696070, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 340, - "versionNonce": 937753339, - "index": "b1j", - "isDeleted": false, - "id": "mPevqaKIOyvM1_XLXsPLZ", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 765.1862869262695, - "y": 952.03515625, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 76.93992614746094, - "height": 25, - "seed": 161213243, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951559401, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Chunking", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "JGKFyGpX1KS2mJhIpFiBT", - "originalText": "Chunking", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 527, - "versionNonce": 1327555355, - "index": "b1k", - "isDeleted": false, - "id": "7SOrKIkV23-VpsfKkBWnF", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "dotted", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 641.716796875, - "y": 1079.15234375, - "strokeColor": "#1e1e1e", - "backgroundColor": "#eebefa", - "width": 322.44921875, - "height": 97.015625, - "seed": 1437476219, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "-UFDNMIXOpAYsEf9ubpNz" - }, - { - "id": "xsN-wlmdU5K7UGi95CYsI", - "type": "arrow" - }, - { - "id": "foUafDsehtG66kl3x246k", - "type": "arrow" - } - ], - "updated": 1715951698569, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 451, - "versionNonce": 1228878331, - "index": "b1l", - "isDeleted": false, - "id": "-UFDNMIXOpAYsEf9ubpNz", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 756.0714492797852, - "y": 1115.16015625, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 93.73991394042969, - "height": 25, - "seed": 1633795611, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951569483, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Embedding", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "7SOrKIkV23-VpsfKkBWnF", - "originalText": "Embedding", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 421, - "versionNonce": 1862165339, - "index": "b1m", - "isDeleted": false, - "id": "JncRqJ0FdwNeHFO0WQj7j", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "dotted", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 641.271484375, - "y": 1250.0859375, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "width": 322.44921875, - "height": 97.015625, - "seed": 207501755, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "4aD6_9mkOZYxvLuujjZJ3" - }, - { - "id": "foUafDsehtG66kl3x246k", - "type": "arrow" - }, - { - "id": "bZvxt2MfEmkgYplJGYvAF", - "type": "arrow" - } - ], - "updated": 1715951685444, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 335, - "versionNonce": 1654728507, - "index": "b1n", - "isDeleted": false, - "id": "4aD6_9mkOZYxvLuujjZJ3", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 767.2161254882812, - "y": 1286.09375, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 70.5599365234375, - "height": 25, - "seed": 696601179, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951578801, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Staging", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "JncRqJ0FdwNeHFO0WQj7j", - "originalText": "Staging", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 405, - "versionNonce": 2565851, - "index": "b1o", - "isDeleted": false, - "id": "YZqdS6HqxV0eCvZhb-1TG", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 637.533203125, - "y": 1406.921875, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "width": 322.44921875, - "height": 97.015625, - "seed": 586095477, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [ - { - "type": "text", - "id": "X0wnY-7I3y5NxPAIay-cU" - }, - { - "id": "bZvxt2MfEmkgYplJGYvAF", - "type": "arrow" - } - ], - "updated": 1715952782049, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 327, - "versionNonce": 236892981, - "index": "b1p", - "isDeleted": false, - "id": "X0wnY-7I3y5NxPAIay-cU", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 754.2878494262695, - "y": 1442.9296875, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 88.93992614746094, - "height": 25, - "seed": 1170597077, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715952784484, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Uploading", - "textAlign": "center", - "verticalAlign": "middle", - "containerId": "YZqdS6HqxV0eCvZhb-1TG", - "originalText": "Uploading", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "id": "e6DNVpQ-gH7v6WNDWWSPD", - "type": "arrow", - "x": 792.36328125, - "y": 344.94140625, - "width": 0, - "height": 56.38671875, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1r", - "roundness": { - "type": 2 - }, - "seed": 1826370165, - "version": 50, - "versionNonce": 1269906229, - "isDeleted": false, - "boundElements": null, - "updated": 1715951643784, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0, - 56.38671875 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "Y3a1yUDvwFK9AB6KmSl9a", - "focus": 0.03933516663234279, - "gap": 8.8125 - }, - "endBinding": { - "elementId": "LZrKOvKX6nGWVOrEpPaPS", - "focus": -0.037869335045489234, - "gap": 5.4140625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "Dn6kngn7QXyxmlCbzgO2R", - "type": "arrow", - "x": 796.0859375, - "y": 512.30078125, - "width": 0, - "height": 62.3828125, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1s", - "roundness": { - "type": 2 - }, - "seed": 414059669, - "version": 60, - "versionNonce": 138024373, - "isDeleted": false, - "boundElements": null, - "updated": 1715951647788, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0, - 62.3828125 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "LZrKOvKX6nGWVOrEpPaPS", - "focus": 0.014779458974887034, - "gap": 8.54296875 - }, - "endBinding": { - "elementId": "62UjU0YjVR7TvLe7hLQCV", - "focus": -0.06217064217960677, - "gap": 12.0703125 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "0Q1io01It2PX9ESFiW49G", - "type": "arrow", - "x": 796.01953125, - "y": 695.125, - "width": 0, - "height": 47.18359375, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1t", - "roundness": { - "type": 2 - }, - "seed": 2076044405, - "version": 53, - "versionNonce": 518155253, - "isDeleted": false, - "boundElements": null, - "updated": 1715951652693, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0, - 47.18359375 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "62UjU0YjVR7TvLe7hLQCV", - "focus": 0.06258252874120199, - "gap": 11.35546875 - }, - "endBinding": { - "elementId": "GZLTgdXXsgXo-4rDdd7BN", - "focus": -0.049281015663803655, - "gap": 10.5625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "5rxlnALV4R8RNKSSzjawZ", - "type": "arrow", - "x": 796.625, - "y": 862.3984375, - "width": 0, - "height": 40.19921875, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1u", - "roundness": { - "type": 2 - }, - "seed": 343257781, - "version": 31, - "versionNonce": 60053493, - "isDeleted": false, - "boundElements": null, - "updated": 1715951657891, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0, - 40.19921875 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "GZLTgdXXsgXo-4rDdd7BN", - "focus": 0.04552557936690613, - "gap": 12.51171875 - }, - "endBinding": { - "elementId": "JGKFyGpX1KS2mJhIpFiBT", - "focus": -0.0436115182865519, - "gap": 13.4296875 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "xsN-wlmdU5K7UGi95CYsI", - "type": "arrow", - "x": 795.421875, - "y": 1024.8828125, - "width": 0, - "height": 39.421875, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1v", - "roundness": { - "type": 2 - }, - "seed": 1318887093, - "version": 38, - "versionNonce": 303905173, - "isDeleted": false, - "boundElements": null, - "updated": 1715951661064, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0, - 39.421875 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "JGKFyGpX1KS2mJhIpFiBT", - "focus": 0.05107393363780634, - "gap": 11.83984375 - }, - "endBinding": { - "elementId": "7SOrKIkV23-VpsfKkBWnF", - "focus": -0.04664009594534023, - "gap": 14.84765625 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "foUafDsehtG66kl3x246k", - "type": "arrow", - "x": 792.3203125, - "y": 1187.8671875, - "width": 0, - "height": 44.78515625, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1w", - "roundness": { - "type": 2 - }, - "seed": 1280415829, - "version": 34, - "versionNonce": 1235268021, - "isDeleted": false, - "boundElements": null, - "updated": 1715951664610, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0, - 44.78515625 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "7SOrKIkV23-VpsfKkBWnF", - "focus": 0.06587762123396368, - "gap": 11.69921875 - }, - "endBinding": { - "elementId": "JncRqJ0FdwNeHFO0WQj7j", - "focus": -0.06311555840914873, - "gap": 17.43359375 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "bZvxt2MfEmkgYplJGYvAF", - "type": "arrow", - "x": 789.81640625, - "y": 1358.8125, - "width": 0.08602962445024787, - "height": 35.25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1x", - "roundness": { - "type": 2 - }, - "seed": 288196725, - "version": 41, - "versionNonce": 714813627, - "isDeleted": false, - "boundElements": null, - "updated": 1715952782050, - "link": null, - "locked": false, - "points": [ - [ - 0, - 0 - ], - [ - 0.08602962445024787, - 35.25 - ] - ], - "lastCommittedPoint": null, - "startBinding": { - "elementId": "JncRqJ0FdwNeHFO0WQj7j", - "focus": 0.07864610464341526, - "gap": 11.7109375 - }, - "endBinding": { - "elementId": "YZqdS6HqxV0eCvZhb-1TG", - "focus": -0.05395713956897283, - "gap": 12.859375 - }, - "startArrowhead": null, - "endArrowhead": "arrow" - }, - { - "id": "u-6rLKVGZ91K-do_X6_7h", - "type": "rectangle", - "x": 1014.77734375, - "y": 243.0625, - "width": 22.22265625, - "height": 22.22265625, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b1y", - "roundness": { - "type": 3 - }, - "seed": 643949941, - "version": 184, - "versionNonce": 115789461, - "isDeleted": false, - "boundElements": null, - "updated": 1715951856984, - "link": null, - "locked": false - }, - { - "id": "i8TMmsB--w6DYXWYRe_qm", - "type": "text", - "x": 1059.00390625, - "y": 242.80859375, - "width": 758.3992919921875, - "height": 25, - "angle": 0, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "groupIds": [], - "frameId": null, - "index": "b20", - "roundness": null, - "seed": 2000384187, - "version": 169, - "versionNonce": 848966645, - "isDeleted": false, - "boundElements": null, - "updated": 1715951856984, - "link": null, - "locked": false, - "text": "Steps associated with getting data from a source and ready for processing", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Steps associated with getting data from a source and ready for processing", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 271, - "versionNonce": 1366945109, - "index": "b21", - "isDeleted": false, - "id": "UMttgjHgvnZXjUlDiqbaB", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1015.4722290039062, - "y": 297.1875, - "strokeColor": "#1e1e1e", - "backgroundColor": "#a5d8ff", - "width": 22.22265625, - "height": 22.22265625, - "seed": 2058850293, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [], - "updated": 1715951856984, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 298, - "versionNonce": 1658550965, - "index": "b22", - "isDeleted": false, - "id": "hf4pKQ55184WTVhdPC92w", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1059.6987915039062, - "y": 296.93359375, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "width": 365.3796691894531, - "height": 25, - "seed": 1703659861, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951856984, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Creating structured/enriched content", - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Creating structured/enriched content", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 269, - "versionNonce": 1600412693, - "index": "b23", - "isDeleted": false, - "id": "N4kjMAQ-BqLtvUxn3gpN_", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1017.2026977539062, - "y": 354.03125, - "strokeColor": "#1e1e1e", - "backgroundColor": "#eebefa", - "width": 22.22265625, - "height": 22.22265625, - "seed": 548622613, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [], - "updated": 1715951856984, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 292, - "versionNonce": 252318069, - "index": "b24", - "isDeleted": false, - "id": "VZCSNlIntRGixA1659IRA", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1061.4292602539062, - "y": 353.77734375, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffec99", - "width": 367.4396667480469, - "height": 25, - "seed": 347235957, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951856984, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Reformatting the structured content", - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Reformatting the structured content", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 249, - "versionNonce": 521280213, - "index": "b25", - "isDeleted": false, - "id": "-mFRWLXO9Tam2O1loV1l8", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1017.7183227539062, - "y": 410.453125, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "width": 22.22265625, - "height": 22.22265625, - "seed": 1321641467, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [], - "updated": 1715951856984, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 299, - "versionNonce": 2014443573, - "index": "b26", - "isDeleted": false, - "id": "l8FTa1uhh3FXC4DdeCjJX", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1061.9448852539062, - "y": 410.19921875, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "width": 652.2393798828125, - "height": 25, - "seed": 345386651, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715951856984, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Steps associated with uploading the final result to a destination", - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Steps associated with uploading the final result to a destination", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 358, - "versionNonce": 998367509, - "index": "b27", - "isDeleted": false, - "id": "3uQWJDRthA7AWVdHSokLt", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1018.3490600585938, - "y": 538.45703125, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 22.22265625, - "height": 22.22265625, - "seed": 1078125621, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [], - "updated": 1715952831362, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 418, - "versionNonce": 2035692411, - "index": "b28", - "isDeleted": false, - "id": "4iycrxYTvkePRrwE9d55_", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1062.5756225585938, - "y": 538.203125, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "width": 135.0398712158203, - "height": 25, - "seed": 1059231125, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715952836177, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Required step", - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Required step", - "autoResize": true, - "lineHeight": 1.25 - }, - { - "type": "rectangle", - "version": 409, - "versionNonce": 1303811067, - "index": "b2B", - "isDeleted": false, - "id": "Jr-S8g5xKeXX4hA1S9VNt", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "dotted", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1019.7730331420898, - "y": 589.04296875, - "strokeColor": "#1e1e1e", - "backgroundColor": "transparent", - "width": 22.22265625, - "height": 22.22265625, - "seed": 832846773, - "groupIds": [], - "frameId": null, - "roundness": { - "type": 3 - }, - "boundElements": [], - "updated": 1715952853068, - "link": null, - "locked": false - }, - { - "type": "text", - "version": 481, - "versionNonce": 989351029, - "index": "b2C", - "isDeleted": false, - "id": "23iPs-E6gExYad4eWTKFP", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "angle": 0, - "x": 1063.9995956420898, - "y": 588.7890625, - "strokeColor": "#1e1e1e", - "backgroundColor": "#ffc9c9", - "width": 133.33987426757812, - "height": 25, - "seed": 963443989, - "groupIds": [], - "frameId": null, - "roundness": null, - "boundElements": [], - "updated": 1715952857188, - "link": null, - "locked": false, - "fontSize": 20, - "fontFamily": 1, - "text": "Optional Step", - "textAlign": "left", - "verticalAlign": "top", - "containerId": null, - "originalText": "Optional Step", - "autoResize": true, - "lineHeight": 1.25 - } - ], - "appState": { - "gridSize": null, - "viewBackgroundColor": "#ffffff" - }, - "files": {} -} \ No newline at end of file diff --git a/unstructured/ingest/v2/assets/pipeline.png b/unstructured/ingest/v2/assets/pipeline.png deleted file mode 100644 index 9cfcf64e808faa9d04f22b58f6ec6d75e1f966f0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 305326 zcmb5WWn7fq-Zl(~bTo6Z}NxTx*^GIF93gEF#q*%7l1ycxY&7gwIqIHPFy-&Ct;BaB;AJ ze<3qbfdXI9T{V>D(MtO1*U`|J(Vi(j(egCe%Ep~?(7e7)O^SybrqLKfe&B`N-uK!)_5lQX`}kM&V}JAAj+a;->WU=KADt)>_&pr(!_d zCucQl)vI)Lv#Hzl#Kfog)WD~>s(G`bX_j`z_pnbi_!$OSd?4lnW^`N?x&Pg_6C47; z81U+Jui*dc*B&qjg#=>oO1}SZe~SqBVaodhYpLXu;VD|sb+O$;U&~@$$_^_0Hzocy>oBLxNhnF$L``21IFx;XVHz+2x zjdvUmdNP&kf0pLwgAM<4WJhyhFjXo5OA(Wn5P zUv!Dats-&Ghc-KqR{CJP8)YCuQ6JWIsN@1@fE&($Yb z-fRw2F9NQytAkg9^)*u;^7m#gLIcg={C*hIO#uS*S8QkQ`|C$*nEh9Wp`3O zWsi5AhNt#3CIRxl4*^9IL=O4E_1%G1Fhw3N_(w29EBPMTvz+s0w+lKuc*2#3F$>So z>VEXr?P_yp98=EK%b@~KcC?5`FJ4l_qzVBTK{iF^nPO2eqXOxo`}0)LrHXx_MeZ| z#;`<^bGy?@Hs%q{YiA$jL$V8%;S%VXHGd!7-~aXhEt_9)qHEFY_B*KgdDoVVM~D*L z^^3HW8dUxr_McoaiW{I@$c?P+PgCMH3p}+8+W4d?|EWVqKL{eFG*e9zU|9-f7~C9I zamUSh^gn6PqQ~*;WyDt;yLYG2R@iLc#KYMf8~rXwLTqcUZ*OHkzTQe@dp2wR|DVNZ zL8Rnnl-XsYo5>8C|139}7hOq@tta3CY^L{;GMp_RCV?o^3qd?kt=~>S+M!D8^!A+= z!+hFR6)afKxY>?`DCSj;=yJG#rJ23PX*l}*0vacMf-@j}YIL^5DN8GJIX)1UZVmH# zjkl=D&KxAD)*@)@T-TDQCPN!5eKO|U$>0%kdvn?hUH_U&l<8P!J7)gj5i+EsVzXp;zf337(SwHWE%OJ6^H%-QxMz@vc(pZHj&iwS0$mRqJpYt~6CHt8 z)O9%gXNV(gUmPTGjoDlN&kGAe$jOn8=-Djans!>vl;+R6{E9%Tbm(b9ejv)|@yec7 zM*7wQuR91vCU&`1(Z&)^FLczq7%RzVUp1^F{V16Dt{cUZIlrWfZrU3NxOwb*l*dFk zAal7@hEZDi`}ut4?3I%A$5ZwG5IJA5sv*AE$>2mMsoLazz=9qIEA?UfZ!$f7-+dsu7y&|C3^JvZN2mpj@8o1v(ogyfh7a>Y1q* z4%sr$RaaL9vI(a8&3iL}sZ zv2>DWGk4<2q*HTCSjAsiQpoOnzMRSl#jc2KCXM~|CkGSDFJ|#-Hs`uqtIR1_V<@3B^j2)EEKq97e6nxC)E2I%v5t^{CX=_ zH%prt1s+JBFD1?cQHuM5@5k%27HpYcl{X$UUe>keE17LbeEzMUH-bZ&@HIV7=V5K# z`;ZJa4P(FKVdeX=siywoy!Bh+W2i6);fJ&-5cX}a8j z=J=hgLvj6*?t*eqZLXbMjbQfXqz1(8-9f1vvJ5=?NY@|DZfw^sOGqXAwnzPd4p-$9 z5r#Cm*>b4JdZ%i(1aoG=-v>fQ^A1A^7EYifw)V)7w`D_gSC?zOLzkA(>J8kprEV4G zgJe%NBJ;l($_b-3^+kL>WRW`qGLy*)=CuON{-}C!2Wi%J3t--F#%%Ipu$m7I?g7t> zZ~2;PLN_J{87i)sp`$Xe`5eS~G8ar7MhG7J^qFxJUIDqwC+sH2Z5^}Xsu8?6sRNa_ zlhn;;9nM`W;gy-8qoXQwpMr_-R$-UTpA%;u+vyD(Rr150JE{())xABz7XA7ZB{Ct(| zE=Z#ZF#WF>o!^mg5 zs*y)AM{3<~*T*!NCKVy07Bb1%!Qk2xorJgSm8eK2T1@?W@PF6mDa5$G2-{{l^SI{CfI=uEbFrf1QB=2GiwOXIM;y z=feeo^zXMgN0)R31cIx#LP-rzG046J%WY%8bK^O#y>pw0x>oKA`d=97>Aj#_^$_n? zx~q|x#^I9+G#)hqu?Zv;h>TlRouSEIU!j;@c!2MNpdpdcRCH7b{i$c=Pi>nJoR>Q#XzKD6BRfj zI;e~-irzv37Gp#)pfR+aFSAM4P6fZ0_>a*RC)C>|s)BI?t`?aRF$Amyb~+Zfn{Tgo z%MSY`!wnsOeEs|>Vr(zW^5Y#I3hKutA@lgLoB~Q|9aLswxa(lWnZM*eSKiuhJ0u)~g^-_h zhv0)baHRIz$>(`Y&!5+BS5IOk0cZ3Gn_lkEX+He{OSS%W=p0 z?T^l{#)Z2q=#V^8$;O`Zp^*>gTJ8}O0IkA-3%ozo$8`*(ZQY;7btO-aAHW;KFli^b z(!kOb!E^@>*m*fwm&j6Q7|TN2Rf3UIPqZv-t`YfS242x`pt{_t&UY=tF+5C+T^y;a zZ<+kLO62h5c+@n<@ILBwdk?)pcmDOsBtNs&Utmwp8gr-Nuz&BTn*ZCeK3`?O9oL*q zUd`RYWVz;2mu{_@^JW6}(gZm-Ofd@g7If?oWk#p z=X)R2XiBI~jQI8sB4bwg^pN7SNf}msCP4-o&j|{=h5^4T%yY#nXNz%8IGo9&MO23N zhzAtioHF~70zft593{_p{v1lhy%lmg5?r*4hF?8* zngA5k_ME5q&qj6F?CG?}tn@L#m$Ab zyGljCjgFd@KAm#a+wUpy!LFJKHrfHoTc!Et)ofIJ^NUXg81T=!MlKy#^(Q_+b#?y9 zo*E*)l9o3SPAKPCg;!>7K_Yb_p7j&0GF9o{oPZ4?$FIjWgPBYX@X=)zL#wluOec=_ z=!*2fXDPji)3vAaTYq7S2eeTCG~4`igO!}dKmndihid&cY0ezqw`7HQEF*bwGITKS zjqiI?t^=Vp4a<+9{o<9NEFD4{!9-HpqLm;vBo~G9eZ=v)R@qk=Id?0pv|Kp zHR9PblxSG@d+WTMt0m5$J*I}!IN2Ld*4Ogw;-f~c?69~Mn9t4w%q}3fCaK-JcobpT zwm^d>q&9x{YBuGEek)LAYZaX-3jlD$+W=&|lR-h+eOF8iX$uKXNRIEp&pqHVoX~2T zQCkZ=pnZ8OoLk-3%elQ;1zq&YVQlBg2h8J};*6U3Y^z6(7`gVf_xzE)*^m3;=Xq1a@xQ8bJuP*#v78Ws`1oT|pk_iovmW!xn zp=5!Mn4uF?#sy#;KLs)Q5QNv_F*BkJeizVBqvWm}KBfc*+c4eig)0~8$<*Km-M!2= zQmeN*Ay>v2!($586;vVEs|H3H&LNt_et z9Xb`~;etAgU$mq%cSOv=STMvS{ak$-8qR=$0f+nsIW;6?&2btT)JOJtH@xl@Xd~F3 z**GSn8OGeBc()MANmRs!fv*T*%LdM&sy&gAMRig&lbqX?82SB-th>h6sYIhp8Xxy`0*I%bT2=M-KKtC z5+VDRg2q_oRKHyfP2Z^)ka;E?j!1e#Znl2?hS!a4llvcv#rBCBJ~+IvKhZebID%rq z!B!3I2;+Splv~*=)bqf^20Z<4Xs_R+sY1)TEpMdM|G22Vh!6(f^F9Eo?2`XkO84F^ zpEpo4GQ1aTIQdJHTWnYYJ2L=Y!}c;=$TBT(-lO@_c>T26kdX$pUv_wPI1modBo5~8 zA9qOXE(BvMcBo19P40hN?dB% z{Feac;@Adi+dx?J)iVFjTF9hF1|QIj6}9A8U$R3RRZo+$;BR;N~s?eZ%`( zj$`BT$o!Z^+@oQqnu#|>OAMR{nE~%Ap5!3vfAJDI`M|0j0P=vNQ4AhagQGJZoo^10 zd*}*4H~qKMzYa}s-#raP+>_Mba&YHmeEm)7u7CDRKXX6u(digER0Y-|h7@fcogKva?oT20 z6wV$;^XIX!ygxr52!P?xkjRoE$xAgu_TZ8%XDu%voQ&=V-PHh$XJ^^Hqbf^&Yn_TY z1_0Cvr`!gR>~mDn6G*h`5t}?QYi$Mq^2msFy!{D`&|$K)-!D1-#j3wJnbA&iYR)i5+AB@}h_~TqK??xCE^GWqYQ*7K_-p z`=o=i&!M(Pvp>xp67<<^^dPH#C;UKapy_gYm$9(>Dae5WB7OWvL;La$!pF*7tSwCp z3XCQdR#I!re=0e4V-I~yjvI}Jjugmo?H6x@^?ID^;lB{UIGgl;Eem&<;CuHj_%NUb zvoSy&=MQ3KZN$Bs=Ivc0!=^10f;5vR8C;|v=ZGSX5W7(@m{*8U2-(Uck zQMwQ0RDE(e;g^ODAJ)ay9B6)RMu&;$soN~cG&BL5?LR#z@n|f1=XZw>6f1|HnY+f6 zM}tK9a9z9BZ%iVTCAjPgPR%?Q^K$mEB0wZ1OYBIe>n&?p5IL4vL&aPNI^$5jDv#oa zw6#2@Ya836)7gLkG6V8hglLXEN8=HhcHQ1Rca~h*_Q%-|+I8+~Y8!VE{9{B3Nqe8f zCOrG~@Q@EcshZiLLkq%PEJGzt7aM_r<~u)DiA=8+sM}sX?n~0*mqASP*@zX_Z4^d! z3Sd?NBNkIfD$eY88e%)Oi(&qk>uLdX05^(pXARGuXP8puH06v!i+m|Ss#nwn*VtR5fp>+Vb zmyF8Dx%GAIvZ-V^_ZvmC5ou=i(Th54rP3qPChFt`dXe<``NveLiwRZStzNY1A!bcC z7n_Oqc%#Sf@Ts}SJd=A`)rbL4FL=`4vYhUC^r(%l0I$of?^ZkM5q})wmvS~+;kvXN zT)Z+{X9#S|SA|nXAgkxVmi}+0e@z*~%cN{V(B{-48}%lg!tA(u8JAkclck+s34j0> z8qDw!pn%PmJNHSwp`7l;{X-}^CBWSN?Ty8n>FtFsS;}^PrsH#Pu8IHo@*eE8fpgp! znTPeXuN-&3)hXa|@&OYjbSXIEQ+BzzaKjx}CJs1j`_1@V0JT7BC(JPs-1PQj+?vxt zi(`l(k|Jp0lvBFHQD79xxhrjS@mK>kqr8VdHeX zy;v_GmL^KDk$@~{=W>@>jCe1PU)a|&fe0#QZlr#$D zvi^d4CyD}|$R#Zn5A>B$S`?5b+VL&Nk0Fc$CdH+KkTD4w4db2~6g zqrW;}u^bh1wv4rW)OE__p8sPdqi9|8jHqt6>5?qy@s$`&QkXKoegm?{TkzJJ$z*xV zNSA3lh3ixon!}dEy^3>CWU;My+ur01D1%{uF!MmX<>y#S2GQj&zWOb)nf+LgcOg}i z`?)0|%CZv>am?1*1KHa{*`I01ps!C}B;x~7EM{Ze@^7slV0OGs-K1@fjIY%wvJ+{h zB=`LvLy;k2cm!G4aN-h@d=uw$IT2)0X;TKxX+~t(H+01Piu|GUeRyH7LJ7r;+!tMf-a_Z*nF#ihh9IoXrGDFXL5LsIM=?&m zY*1M0nn$HIx6vEUUHGpr<9;|MYs5I!Q6WS$NPz5Tw&NQ~!WK(v7oq}N;QHJ{)|1~o zydOQ4zTkA|eTu#NaQnq%`S^3mn&+O7Zv6VrM$f8+BVO3M-|e%RUXpgl4QCz%2_c;d zo};`h0#QnuN~3M}2iZZg5u0}+PS^`l6*77LOcF0k(UHkw<@8O`c6Hq|Qq~>G*>eb& zi+x4Icx5MAwkp7%cH^<-<=y=OeogF|{DB-qfn!WUWHl7Y2^*^xRcs)dOJG+06QkOP zNRe0{h>p`oH|{3*ObL}DWw%;I4EUc^D17eC*k@Q5m*xY3?%O*GjG#c%A!(LZeIto6 zZ>4-2xQ+pDa^{JPI)!gh?O;Xc_-y}fyf zd|>*jWy+7xAHpU@1zwsasJKTpQ$nQ9vB_E;m#;XB>PAM#g+>uXR@XhXkXjeihSjXr z_Ce7>dnjYzr6q!v0>I* zl|zs=l(YZgEg(6w zo3!`jJ9d7k8=)T)f7)qF!1k_(k^XBI{YUyQT0hV}^kt5H6ZJ~1 zYWre5Lid0#<=_2W<@x7iApqu#eh#XJvrff#$( z54}DfRg;nX0KqaZ8wc<|nZnkDU1@e!ul&Qn)}+-kXdrVC0ozkCufK01HM*iX+>Sy{#Ol0N5WzD(Z8;+QNC6K|TdwEPrgxPP5Fw|22|%Dz zQfr@rkye=wDO1mL%9T z#-EX#;FvC+N1Y#M*p+ZSIzYKn+2LY#;;7RWbvvHu4Cfr2jf!(W!Mz8f|CK0Q-5bpkgHHj{=`W|9aB)fBLDLypUh|0Fdzj;kiK=D`%I0DNV7=ZRjzmV(|_y+FRd6TwP5HC!W#K$`=--J zd}fKSVGsFbcdw74nN7QqI;l9jfuvhtHih(~o5XC*5PxAU)&6x7mnKH|qbRbO?<8<% zv(NV3c%O)*_FBp|OX`WH4VLs%efDg$lE&n$jVv5M0TnE3ojq?h20@um+#fe`JB1W) z#6=M9mh3~TbWZnu23tdbrZxWd&oI#Nwa%wIf|%rC)SmZX&_Jmjz zqx$OAz~jw-<(=h)N0D7&*}&HU8*8J1l%iBS&Mi*3Uj}Z#Q2Bmi@S~dXC*wyG+^7!{ zBlK*xl9p0W6|?=7txgm4u+8ndCD(qUgNc|@pJ`9vcT4f_W^~`z3uD@<<*9jCkaEO2gtEwU+tH>(NUZZ%3nX$GBnp*e zaUG{r(jX_9K7u|M880lT)I9Ua!{<|NpIMM2ZX3s+Vi^@^WWlRFAr;86wn$0&pHTxH1lNhoTSQ?i6mam7?;2~IILEGwwrImBA=N((15Le z)**i5E&&=4h`4Xy#bJ?{B>=MAR5HE!Qy??!BHK}q4*F*M9{)3Aooexf41Yj~V9m7q zWLcdVMPBjTZ{AjnU6zpVa-Qj`IA0dX6Q@T6eQw29vmVruAaWRNf(*dy~0 zh>X?GF?RZS?Tj-E67$nRgjR}ye#?0 z)PG3)irswW9qfrfM0l*!XQ@%Pme+ofoI|-H(({4i_6{PaB1Ikv;TkJcD3qAg{B2KVi(A72jIo^KkMYQiK&TfK6z!C^gL zwl2hFveY|#62dt75B4VD(d!Z}Usf)KuDVTJ2bHMrWAa-51_n==^jZ%qh)^?uHaw~k zbQ*#byXUEZ9Dr0V-!TA2li}B~D*sZEhjw+h2qRw7s_FII?sznVy0MAj<03Zc5*}|= z6Jmmnu@(%8Kpn8)ih;=cO4^fhzpaOdPi$}8;4bCzelP{e#{J zy1QQ@K|9~=Zc1J4OX}j>3HNV>z9D6lb+mg)-Ab|_vYFKCgR0p6JovJ{-^nyf;X%2m zWNa5(=TJcJ+aAmY11)-SS8u)^rNr^Lo2VLXIWa0HaoawT)M-o6GV%J zq_g@A2#CB2+D-t}(oT*u>dY!Wz4h#Hnc`7yj(s<|P7)V3%aRyjN`9`o)KzQ?i=8K5 zVgP>$G%q@4MQ5SDme*drtxn%On1ocz1c?)SS_NqsBjMn5Y&)XulUl1>h!hLeJnoYU zL_k|sIG<80N-OO?HxaA0!qRWYT>6Rida88Y!fPIHC^4cK%r$<*OmPoHzx^P@CBmfJ z9^QT~1Z!t~0daF*)aP(&n)_jIHuoLZ$=YMuO;@gx0{`*eABqoHnT@667iKo~z-XE* z$O^n=esSuaR_*f$^ala0(i8+almkEEfHjrlzIrjCeCEUb)>d0*1ux{4?Z#_19+?>W zxB<`SP3XpM%$d?@ybZq=Ub0V6GKBDO-+lCN-~8zk7ei zWk1DWi9J;&roaT&Q=6iCrBkWj8L)3XjR% zkeS$)Oq?-I_KUqLOk>+Hl4K!67CFtdYHWpjeps3wFw_(Zqhb()ziIep*Y~Pmo1!!w z&O>|X)lAD%4m;K_5(@u<%O46iTohz4m9=2%naOO-f8+C{W>@no2E1V}n8Nvj0o%n5Cd+X$ifYMjIGtu)*J zV*MG;4+t4&x`2B7Oojl&O#MS+9LWM&oT{z76pCxr-aJtPoNpaqT|s@%7WRscn9f#D z2%PxANP}&}>~ZRzUqv6-kW;(#iV?g@V%mXXjHVEn*W!~BykfPFlzwmasfEX&BIF_4 zi2(B0mgcFve9F4QxV4!&%O8E1O|;~}d&j#!k54$-XEoIP^`E>LcTU*HdRinP=(SDf z6WAS9i}M2Q&}!jVAu_abV<;qp7ta(l`+_FRGf})Lf8!=g_hzhYDrU7(?6z>_Xsj#7 zI?7&*t=)c{L@8R?9;5yJ$A*hl2l!h&dQ$U*Rk*#(bl8RcaKXatCS#(v;Oyu$vGQ9P zyBg`CO@NHv+B6L%cKHhD|D^FI^*FOyKQrSc| z*Ab9$hU}*4Nb46p&%UQrAOm|eR^f!SGPA{wVu>*q?2m)>cM&dQ{}@i;(qk0X2H&pJDGEtNAuN zd}pIfi)Qy<=PMJDDrOjW^tE) zNls07=70Ojh<(6IKz0+xN8_)T;2yICjh3(<*xmw3CLV{xQ!0 z9-a6=D9&ZstBHpnG2mdcn3k_JXfBsItFJgj2+H<{-c*Q#z4u(Yl^r>`cD2Z0(gH1g zBBMrx`sF{P_BrfAl-_+jp7_0`NS$seJ3<=Id=^2TY4CjyKUkO#{sFSI@Wi(H`s4*f zP{*aaoIGr*EsPH9aBR((LWg7EQ#_~Dho0aB`d!6J!&m0_u-fJU!BL}5BI~y@2^Fa5 zH-UfwizjPV-{j0mwZ=IZA3F@4w{&|bS}>k%raMaMVH?1zKhpdXxatJu94Uca7Mx-G zkprwy`~sFCsqyi;!Y z!>wih%EXJ&iNwmxCa%rP1Yl>8ou&3j3yB#8m1w)8>Dt|jAk~LD?KTlt0(j-nh;a5B z$^OI!|HI7L%3-G$ZtcBFUW*K2t#Dmt#lUFQ2N3U-7#=Sy!;9WHVD7)*_k7eYH^A}e^w3|BSZjKA1ISU4+0`mF?Q7>Rs}+u&NoU#Cs)1%X$G9+J z5+>688vTbhA%Ebu&J=B>*u;15CcrLqes<-wr%mJ`P&03$9Ut@;m_#nJN@qy)s541c|F{g?EFZyHkD<6)Q7XiqDACy5BO#eFOdcdU zYW9aSYo%`mMF2H3JZkl|R6I+&)ig{sBH{NvNw4K)gAg%W8qF2^hGbHM%zH}FN{COg z)wu6^1b1(0ug_5}lP>xow1#uYlHr#(|H{jL5y;k)g5;RUy?W2`ehyTej)`{tI!L?s z7Vi2%D$c_q&&iJl2*;mVLTex&tre&k^D3squyIQDOm%AlOO;-c$zMf@9 ztKC02*fIy$%|&V*lz&p15kf8z3B6d>S-&;Y``#c|o`XZ4d2=^SPq4k8JF=g_^_QnB zpYQfyajeCqPC52Xmcg&N(N}}?O=WhT|Etx?QJ>Ae zz9GXn?qQTVc>GUqV5)~fl1~rK)VP?XW^hcs)n`_LkO~&+@IkJ^&K~~etSmVvs^@t3 zaG!cWq=8|$lT%b#*I4Iw$M!eHzaI}sB}Vv`XXbfA^#Nlke+o^Y1eO1eTEG^?iqVcA3aLFf?GqeD*uVt)6G z+{+#qy#8P${T(ynNf4=4S?z5l+1$xOHss^Q@5L`LbwFg1)*1Nnb{dXVNNQol=PyW^6zgTaneREb+RBZLVrxBJL9;WcA!sR%`v3dLP17t>b+TpzjA=G z91`mh=n|)O3^R6<&S!|H2R`oR7Wb?%Yj@FG2bf}d%WPI>?~^4{QZP5IeDTX#D+*b( zSj2%+LG|{FhLNO<8PPqNOAkQCml21ot5rZP7Q~qc%mvJwuPFd?fO| z9$b`$xZ<-<#rfCI$%bB)7Kb0aK9>l8+*Ph|EA;j=9+gOwL1wTK6&P~}yUoxM5WmE+ zcy{#DarQ&*UPSLJCA-h>{LLAWA9T472C8jKS1uhd27`%Z#Cmx|X6d6JIROLxFiT?! z2tu0%)!-iWcvW%_IC2P)&fK;HXhoXvTro#EKE3LG8HWO|&eE&&<42oQVuSAR(Tyu2 zWpxxZOE`J|SJEiVZA{L4_{6Qm#T}@4VG1UQcv&l2(}7)m7gNz)jlo7B>muatj6Be| zwUMW-V@BFc#UG86PkwN(;B4rOPH+tj={`2(!svfz+|fA3x}eqR+4{K9#P!Q^Nt-*J zPwn&3!u7|C3T9r{qR07JoGooUY$u!=3@7gkX9_0n>uIWG(xBoD8ww9H;{I_X#>+90 z>G>wlmuxB>tyo9!5nS!U3E5M!jRG+)bv343Z1?yO?xX93# zX?4eZId{u(scLQ1$jzJCQ}l9Kq%g1oE?699Rig;=J?*=_tycd`@vXLEBk7nigKy|wqA#|lgr>O%{ii!QHd zdacsiGZQ@@e_`r$eYv4@-vwj2JF0BLPr0WW*BnF2wcT=F+Pp1q{4##j`a2I)rf6qC z=@`2Bk+4@dnedT*n3}e$xBZnBpCUm*{#eD6YI_%&r7<4m6Jw|X5x)~qTTSIV-O;zqOj5=j7XXH;iEFEDpcEYi`~@ELFD&)(~bFGZo3i(R(3b{?y_C` zCvjP%+Z7s^j#d$^Og=h6B{1Y=4wII4xY#sot-Pb$RhrQ=RZ=N8={vQ&P#F}VmzkSn zR;|Rx=ux6J7CucGvv_loYc;cB;8h8I6e}Qd{P1oRCyIlw!D6+gov>;eaD5yk0x>aX zi0;hGy(gC)p*K0t5N^D6H20JN9B)$Pmu;7IW>`CZito-VZ(^lcctj zxl(vxLka&aRaiNoEvT$sA~6Dq_->BwvO!n}g1JRgj~{4Ntwe0a6OiSfBC+)Se-4glpVz@^q}HrtVclug%h3Vq~fQkkQi~m zixd|6zGtN`pFDGb24nn7>EjMo79^szZVl6U%HL&t;@AeoB^rGlL6Z?m)Ap+hu!T^< zY+`$YUNO4p@&r+bnJGT<6#bbmwP;1l@*Ete*M%VoiMJMw>ke_`9lCMR%h}?PpTXiX z=~|E|7!cpC9Qs%mqV(N30iW{$Da?|57ro%mVn_ioW?hTs)vfS+3|5piT;Egma0*^lhDPa}v9)8~I+YZ5JTB#10wMvHE|S!yhGIlG5! z>VR=Cl%xDOHWOHzz<&HTaq!jRhVutaqBHO)CGkG_+scK>EsqPO5fJ$(E6{+Mk5S1U zvMem~IjZaQukvL@cr;A{`08X%_)=Ec`5RNuY3Cm zzN0-|IbJ-=R#E!y30&U!nXdzVQwtt=3`tncb-Xahw)m|waH@UOmH->5vz%nX@SH3&>~~OIk&ibwEr$Q zDm6!hM&0Y1O6(Y}j!Ofi&#QY%cE73lIy0A8NGX45F4&~MsgF8k<(FbTN3oRjR6g-m z z7&D^}(;_LR%CShjQ~MxB+9ma8J>TDGtzoC_vAg??%J~8*j@mcV|h-kUrzF~v6nW!&cY5~UnHN{*EICo}G!#`+E zE`UsIJ}DEKr0Hn?(1_=B=vqG!pE+6actx$&XA8pfuOf&i#X2|>$~i9=37K}-mq;=TJrOn>OVFW=#7bZ$ zk1VsM%7!>nr7*kOs^)v9{XFDN@?-8DGwKfe9EsnKMPVMQGh-jWn+aVcrJAvi@j(AHnT<87Dmr^1pNe*ZSU+I*r$8Ghhgb3}+Kt4K3_*qR02O?8TV|@>VY+ zQrq?d;ml)&cKLCa+;2I9)!)a2{CCZtfVMotaZ-OuPV>&IMrwBI$64>&s$4ko_Np9y z*`14dw4*SUSgh+SU!LhvF`QPro}bBKE)(rv>0x5d7D>o;BiLz0j^&^j;}R8WVkzYV z6<$el%rGCczc}@5=*26HBGSx1U5TVP*m=^WQaE3 z4b4J97UDuPEDV1sMdiW-4bLbJh(bQAlq9O&NCe;;oAj*iAn_$s#%?&a*gvkZOoSXx zwM-fAx>RQcGmSck6eXP;w^uS!?drGJb3t|qRacRjvHX!w`k0z_sn6z2Xvc-bFNm!2 zsfeE`9uvLv`vQ=p?wPF_$mpnBaWHA{>hY`od&TkMQaR@Lp}rYRliPG;$4|p#eH-_WR$fuOM!fE z8qaMup=EZlg$a!XDiMwo?u03Iv=WJuSk7qVr(&8PjHyzU{Qj3ru_FRG0g>g8eG{8Y zrd0c#O7fWd zkjr?+u&7kzzWJ_ikbBnAq2SK%|M8x`ZFkgU6msgZG6`6u+75MsjbdyFxFbXzU$bZJ zD9vI_-S!2J|3q#lb=v)N?*X`V6%X8*EC|Py%$`(#dtWbjFy0IF9k>Z8`Euf`pp=2^ zFvpJ5BDP{+@~Uk}Vxj#=#W*ZVJp+Nt>(*_)7!Ksjwtr)FwN!U@ucoUCEYH_2X4?&O ze)_3390+8-MN_~B^yMYsx3yc?zTb-PrT|B@Vu2nU^N`iIGp&q!Q#u@=7|Uw0i3u%f z-ag92`?H;-3E?7${PoN;qkI<^G)lucltfK54+ND8c-}TqY&kThRsCsGHeDGmQ-rnf z*b4hZqDvz~EAvxRf`<`qQAoM+bU6mWKMQU_iEqcXsu{-O=ia$*IpA3EKW%Cwl7zFE zf|ETRMOk?-1^{VUJjrRKf+~|1INY9JPD;6Wn9g^8NP#7Lm8DRVyxA0oU@>0BS7Ov) zNcmK`dB)hZ5BN1r<`d7&c|{pkA+qgo$+JH2BrmQ71t&!A&?f^0Tzxz$5|h3;TZDRn z+JV0s@Yml30Nm2F)q^3U3>!JqQe546TbE;7b$XcZSJ7b%P83#$bJwqjqT(gS|Jb+5 z@?Sx8a1wP+&Ugk#rJ(G&_yV0izL|v5n6Bh<1;QmWs1V6+J zmP%vinM(8G5X`B`qCR~6Lw`0>w6Qj2FBx&lw{m=Uhv;EZZhYOi^<7>v9rk{F!8fiR z19!>t;GvQeq86B44c&@~nFH>WDYjfvZqh>}tjWFD1Y*5@?PR^}C;#kFJ0C`vJIXz? zU)T910k&JT7Je;|<2E^BzxQ~zfQu+pgX2~TA^iUJ!5dD}3}H#xB*NB^mZTftOP{O0 zg`lVix!JsQ{KaZ0276pBBVzQuV9;Gko4%_MLr&u7KlW!#8nnrJ`5p zg+hvF;j$#Z$?TFJPdxVSM1|nNjWFWK|qo@rNz7Z8f|zFBgMJ zjb@9x%I$N$0V!)|k1}-ILewS77^EnTZvxw@0!$CqdCci@g2VR5y4s&=kh5^Tn6Gh(3z`PGVhb*)G7Qmd|{)EI%!#B zrLG%O&#NH!!f!F3fVuSlLXkGA%xZxh;o!{q{2NY^fVrbf3I8;-BC~tVvt6=)1tDlC z&nr^1z|&}bp?g!?<8SzeiA)+o3aSi0vAV7b!;Iq?Oh5;Xb`>I+gLe!N5Z%|!NCy`L$_ z@s}XIw-mbjqQfx4%M8RR?}Uh?0L^q)KUyf?l1e>ZN#cYf@u|#tAY5IGa3Xg?W7tudRkPJkO(OO`j{PH1us#NmAf)r z3QbU**3vU_3ve7h*}p=G(SVK?!E!1I(^I`ZPbpo!Gn{zK%^l_CerBMNx@BdAu0)KA z+ZylvN^>0PJZB+*=F#lqoQfWz(vG{wqx{D2h67K{o8mTS9Avk`&*AnyRL0akAX}=s zx`)KzDiW}@=0#Jbrp$;3V0C-f_Q>UIb}Q}{a6gl=h+_=tk)%^P0f~x0N7XBR8xmWV zmNfa`wosf)km&Cz+nrP~E}~spx~Ydz0Sb0Ryc1LL21ewD@?#U-VG;a9Isz)i(850YjqEhF{Y(9C&rS!M!s*=l?3Qt_^OzDZmA;I7za zFYjC{JLr~dZ|b^UqH(9L1Ha9&f8}942Hf4otN8y?JXnG{bj5S$Yb2yK#t4Q1(NX)+ zKX6!ifypc-t@ry6RL2P&z-zd)VI)NlS-j;#D4oR2&N}ZwL`^;(z{X!j zkHVSE9(3wKxbX}-*Uj6l0Xu7px{iG}%iasooju+#?5w*RL3=KM66m^M%R(gm@whr{ z%>gjfGj```>tRPr;$V+Cf9N2SB8KV@K9y$0gB{0uG_a9=e?={yLgBUbyhFlvUhFuV z(|y51>Uz%+`7N6yAc;0XE2A7{H`bmz0LJf2$IjWa{T`0D)1mp4*I?04jy|HLOg~q8 z?b0@?(HVMGaHUU-jbld=^_7c=K_~PVpsTc2e8WeXbbmaJtr!fc$vQGtR8Kr6su_Yv zvKJH=L6jC6-C=EF?`^)Oua=*MNSxQ;ZKitANm6}lhvNOD7Ic!3VaDj1v{AB2U-jcI z_Q>MLpo1B^$$yJ@Rh1bM&wD(ktsLN}Q$1F1w6z_-pWHXHh0=zFm!8pT_paN*5TdYm zLnpwE3=kMqi8qNdQV^T2lo|geAEBL|DXF^tY!$NWf_>4L`4%{<}`3#x#d`P0KbQjK=+ofKqXi{%UR)RzuB zi4>VF`13R0ec;SDt6{}sW>~{{2Z`<%$L&hhdz_Bu<|B<%yqZR8pRm5DLUfnOQ0>rA zj*G1r21$j4U=)*Gmka96hyz53ziYHP6dagvN(>a)nH6K#hO~?{P1HvzzZqd~FU&zC z?xG$K8S#tl#R@8g@7HyD&Sb^oaq!elCfA#VhZR}a<qUTac3Uw6gM@~dimRl zkvdkQ8}Ly1Y6P;EarlITYL>60l}rP>V&6ZL7F8deQ99J^MCa ze^O(yRxq5pTi^7p+`z8=j+jFsDDfjU4z#DPz%f#!q~hMWf!4VinCGw9wT-@&yrr+z z?d%zKUDT0c?yX6$JDS)t;TFt0jhrOc`h`A5;;?C241L(Cc#%7s2eQ@Z#SfC-zjsV< zey)$4KF*`>)GxzbWOhj<#n#dUfZF;R6hESwzN+u$RC5@ zd~7Bs&Sv!4x`&O53Pt3&+IP$l(8kh$k`5EgA&0idY9PI;7cJBt5E*Vy<-PT>{w`R- zJzDI6lOA0F*MvGXR&(RmFD{Bv8)da0D4x)hH>Lhe3XNit$e_d=k}&Q5BGtHi1Uuy7xda`aPT+e;g6r84!`cW z&Xd-HX2=iM%D^= z^eX6Tc_uUmxoTO+(X_qJ-&Edh{G@=u>h5!iid*bB9H_*B#+pT@>zyfs`x|FEhfxGp zBoCQ1OLkp#rv5l?88hupHlMe9xupGoEmZBOg>$s0ej}U%`_@=jrPjMF>%ia%fTSUOGnOEY=3FH}t(Wn?!hUN5+dIXE9BRm{Y*^F1k zafP}LBKvG-(*&nCmwpOy_*sed%EQy@3ijqYFcG5a&8Vz938YqLw{+^9m7GYvO2mET)PluYb1R*IhWp)j%jn^sk%wQLzFguJ=z{$?pGmOKT)}-#Rc=>ZJ`q#`tmK>md#<)j3P8)A>ND=<}kQ{Bt}h1A+GJM2jHABl2O1<-RHR&ofmjs zWWj?T!71C6R9cE!Pa1oO$xNv2r_Fx%4l8k_z0`2@`d&C^;o<_yiOGp9Z^2l2)Zg|nv*oGlEV$BQktz~M zGD1Xyb_?%JrQ%x9^36UWLxz=h=YIE;5BBkxd60N4ZTan`6bjiwY%$I`E{+7r+*~-} z^SRl#F5`~EFQvFN^)v|HVq*tTzvvZ(a^1EO0IjvJ0bY@= zF8A88+Y9fU2m8?{o!I3&4CmR=!yIb!$6qYkb}en&?xo+rf3ky3>vxy`qM=)&!Lub@ z+KZYBFJGUn|8(yCbqm(4ik&v2e8^gi0PKpP$Ym1~B^+y3ckVrXAyZxJeLD4>b|CMq@0=^vW$ z@3-ji_rWF77niC)IzHIs^_S&arn2RRFiN^G&7R*S9B-KR{C+3`mX?vJ>Uwl!VwGGp z$Z#xgXg2}g*75{zs-Z3DmW9szJ}+DzjeIvqLLF9Y8jTCVOBTWsZ8;&p={7=PAGKb} zDCw{0QE=R6aJQhMI19ixpgI1Mysk2f%Rd`yAK=Alt)NzsY%*YUb=I-+rE^ur+Qp_5 zczSTvd{bq*V&EG-n_+yR`$2-`^5sKV=8uN~B`$NP2;atw@NLN#h=;WJR$d!a;bK9) z@q~DETmyVR67IoL!YiknlEt5USTPpHv*OKu^Yn@Vvd+zS*j8zW^OTWRZ39%e&LXh3 zALpG=By+?=0d*B1-De07F2K$XPHDKL5mm$I!pM z>b3nbRIx4ws z5hf{aC_X45`DVv3jKbF>@OfDopHae!GJ~x-B4a+xSyFj^?YZ7qZKw;lb>o zd!7~YA3FvuvSv-cSR_+(2(jbPvIo{rrs*4+iaycV6Mc-63JDZJNm7Yf9f=BFdkyO2 z2l(jsUxUq>in1`pcd%KzWGZ7FA0~|*y=!@h^oZGMW9R|74L{w}p_o@K%Y{^E`e=l0 z^iI(C^P_HY+-bSRF9aJWv=`1Fy0Yq!WU#G;@MPJPK=7u8e>|AZFmRQLUe!Vk6SiTT zol_&9A&fZmu4-~Ws&Vth= znq5auRWe(3aoWo^SO$bt@FV&TzfaxHR5naHUtU;MtJ6O0e&nETciu&NrwWgRn`gqO z#lx24F}FZB(Vf-0q==Hr4T$fsyz0>By-JIhnU?Ct@X%{5qw??#fplzgG;=oW^8IAQ z3CkDeW8-D^D=%D@RGSEf=0GB>2a2xI25?e zj&sIl<`OF%v)-%8{FvtWs|J}vT5oKlzuqUl3qtc5HLG?7HxJ1iklOq+(q-4j>zZd- zQSYJ5K7HJ{^~9l+ju!K}4wo8FAc0k6+M@9-qS;4N#`mww$S%&QGs*_Apcyn7s}}UU zepPyRN8fbn>#_I*!D9q~i+}8sqrr z+0(>OJ-k{%ay^nFytW?Qm^)k&ePMo%!@a{_@uyhcPC+Yru9%WjfNZDMrDp=h^13Ku z6>#>D%IG}ah9)YirIphG7Q$Y3Z1%2j$L@QmO|mCW>T-(l72QFPNRCH(Qd}%V79AH? zZ^PFGE96M%(4EyDVmE_F`R^L54Zbws#xZ0rYD_T+AoVeP$Jx03cx;9?2dv}T@rfAYp(b~OCfc2`{VA2kxP(aWSDcip8t4LT z#h9_hj%%Lx51VzCM^@M{cGc;t^M`33BivadaA(tur?XExO^n9#Ms2h1ZIcC%6oqYL zussM$4?w}IrgA^qRwiI-y+uf0_sRYvug6tlqly7T%vu58Pdxt{I%^JUPLtknGJv>*is!WUQ}>4BT?qFcg5+U-xRkWlQ;@Q|QVz#Pe#rM-FW;vPf{N zQ=!dtI>?l|cce_?pt{&W{KeZ{vV0xDOAhf6^w{n?USGHi34JqRDOZdo>2gQ6Ckb`0U1lvUm++@fKts6*2UA zrnxIz)p)7akF&FS>C7KmslWe7tmo1!`e>A0+wffy`nK}AOzOg|)06u_*l}KW8YK8x zhqkMp+=V4$RiaWk(98y%ZsiBch}E_0yRbU1W?M{c==djL<=xz$5uCkjPNqc(ihIt& z6l7X*!&ciJ;KaBchGr+9h@jX%p*Hf7jYGLZV65;$@fH?`7l;jL?r$^_p}d72OG_I6&Ldc6}nX1&9COJf=_+cT-b1l zrU3eA7Ev$U!$(Og!L{f+w7%#f{1n1D)Fyd3tBJ3#nVw-rj`qUlaa+hzL()y`)h`*u zR5GX$CWNkqnPny$^~z(*t8uq7Go9mbsJgdSnz)JW=$2?tY%L_}`ER4r^eY`YC=ACK z6pn+_N@AtnbilpXKg~F1oQd}TwVM{X<*7zek6zGwSC0g;k-$*1I=o+z#99t_jeGK> z&~F%Dwt)*1OMdU+6J;O57QcnnFNSE=`AwMMUohXz*_-SMGZ0iOu7DogxXO7-_)`>& zGq&^~j{8OAAJam1hT<7w^Wwzk!#SmoB%6@<*Fmy$oGW>jY*5XGyNyWym)A#e1gn_+6-D1{Y`N(3zclQii6spprKE+ z*vI1X+$_tpI0~~&nEQ zIWw3Xw3R0vtHhE1$x)UK_Gmo@qY*ENE?{H%Tk1(Qlce5%4|lcipj%SYLALqb~V zu9ou0@|G4+BSgAR-%}gfH4{qubb=5|OdPsVs10)SU6rn=0NjG^&jOdHdI12rTf2V4 zp|wWYH4Xp^<)@YJVhewLq_+@*mU5MWo9)}e$70M{zttg4t9S>c9{`F?*p83(xbMz& zfM&aM#Ru2t>bpwp!U_(b+ZB{k2e?*|gnJRK!{)b`TEp2bb7c^0!qgos; zisE|jp3jaSsaQ68Wd&DvJO*V+#oK|_pAUYFAHgikcw~f}g)hrPi(A4a^T$&}qioBz zrOMwnl2^rPh?-Zfn6s1NZWGdF@$APEX9>Vn!x-eS!{}wmZuF!ShWn5Lu*`$79viWw z>v&q(fF`IPZ##r;o|PxA_NG_N|78W-6;57;)6K#S_$+tt7!}=ScS4(~ink8(;bxYs z=&smBFd*=g?&vH@-~9Tn29z4%BN!=3>)S^vT!k6u-*Qw;g>?m@*)6}Af)WEOw4^b& zkf}cjv`4zgU55ugnP_j!uV!RYoX6lK_1c4?_oO*}D*=EqJ1HuSWD*rQ)!Lt|-A;B& zi(eQCuKXOfe6MEJC+6@^pc*2PVF8hF8bz7%)I$vN=b_e7fU3oxkp`s-_BB9WdWdz6 zrVy*i7XbGd(TSgsWQh@}-|he{jU_CIlW0?@APBq(U6M<5aC7P_m1n&RW? ztu8iDKk4+1EQp?hGxe;2K?o1rv+(VG+Bd7ps@j8glk44(ThKELi3=5-uBmPG>}dRS4m6%=091`-i5e(4W^8`Hk1k7~Th;WK zVC{xwqLdUB`7*8nC%y%iTX44L4?CpvYzDx@kraR&CA z2gqP>C1LizO$Grjn2Ztat0glI^=(q|Z*#Y_2AMf`a~doAEuC*m^DZ_`Fz>l>j-0cw zoDWwW6j=|cn_wwlhuv{tfoV#)AML{!sfENxgWrM9Mqf-b+eg2{4&T$C@(n9 z!E-6fgAoIq*&AqAHf*l;$rBZLiU^Nk0tWT7-(_$M(8uqG--_&=dq?a~9#_#mCc0S0 zDWS%>QM~!MIas%7do+8e|8y|GHcXQGY!dFqO<~i)A?mmZHrD!}Pyo6|YWxR*)ccu- z;vNE6P=9_nov4K5$*M2C1j>bzFebs_l|F(XdSdmYlKQws;(9?{O6dTofH&sZ>k;)g zb6C%7_5)0lvZN*ij}$CL#hsD}6^(mZ!1Sd=1t|Z*dZ)ar0b5ye+@AEnt-j9npcLqV zw=VgHjrX*lLH6}?Gd3_aLr%&25kHgUI1yQ+x5!>6^4XesOL&n{Lxu?PYOWF`+5uHSU zy{tjuufMu+^qu2V9JeiR>j4D`EbV!5xl3DLVjTzCEC5-k$FKu?GoPa23ti7kCRoMI zsojLb8`=hUTQAHdq;CI-j`Bcxi#Fl796t-d5DPus)PV3)544dc_X^!-S4$7F_GEgL zlR+IoD#r%2DXi}b3xDMT{H2oeq=1S*Fc5=aT!U8;$_bh5dz)carQNOSQ;8yhztU_^A4~6IN z5a$&L!`YfGvzmthCt-pZYNm%1@l13CD6s_k73eSMwm7$^C^qkH-rFD2K&Aw<1yGZe z^1hTTyRl_)b?*?*)jepX$dw_I6&DqSpU!LVE%04BA2vH}PM-O20$%Nxz5rO?YD+|JdvAJPy9qn!)mP#@vdCLBzTy*njd9lfb^%d4*15DNsXjq{9OYQ0FT9BUHD)n&#_YCcHFtN_!bhn(fITOpS~}__ zKM)cFq7V_rcK|@jI$8b=f*g>O_PlXn4CHs04ixDOXd-X;eG`A`&))t^b8h!cqW~I4 zBG#@Jput>lcJRyxooic5aR+%E@bWj(;H{`Sdm?8&S{3_^%m8F|P0e+Jhe!t5Rdxo{ zZ$5jBF9^)evuCOrrl|;e%7J4BOTr)W^{R=rsv-q=Ycgz*jiyfpn_G77t0HFUJAOdr zy4PzBfMvx51CYXA@!0`Ixs9_&Q@=={w|gESJ3ya&3w%FlUVCqs*~Zj;e(yT{F?B!06Oz$>YP$6QqJ}oTk{>6Gsd0*ob z-;LF889OEaKomEm&aYVV=WzZ%R{lk3&3vdZ)lmshc|Tz4R9O-4ZDIy`Xi)q;8v)V& z!x(;z1V+HjG3FOZ3cT;HKmPt}jUK`WlWZ8?`*{l@)A#|`)aIK*aTL>KH%?z`iu@Gr zpWpub7yR+HZ$tsAl6&vBB5@Ey*UUjFdPPL9M8=E$v`&AS+W+%S045EFL8LLt_p?EO zClz>U@M??Dme7s_m;f zs{xespJQczkBHt33)El!eChwl?XR2i!~i15L`qiw>!UQDd9py+)ZkZ!e%sgl_4c10 z6EUnHZE&|umgAZ>KdK`!#QFh$T&}*?9lwlK$J}eva@8 z`koiURmmUxYzOdEFB~7}K^ytV>VJNi07UXe%fG4ON(}h69K4u6LfPLg^M8M>@jz?> zK4Y2W{nB}1gcitU>;HnQ|9y6T-M`Z_pz3UUx(fNaKQ4dCXK6#~8hYcR$ z<~;3y^J&tp3lB@@nn*^?vO{zFEg$FS?f-3w0q{N;#8(!RZ=hd+B$~i}sJlOo*N}%|e(96^${cpKA9{I@SP|&?o~K5Qogn(6k&*;^^YU_3Qo{ zCHf;551_%OtSUNSmPE?I+lr(*)_!Wl{~jU;U@?KQO-a3z4Nh))@I0w?-$)#-;;C{? zIM+~7W9g2$1&8eH{%BQV5P`4`V>?5d|EUIlv>>`GGtzyiUwO%!A(`X=moti{$;U-w$0uS9}_ zHqT;~DOTvo92eAI^gySYIP~zxA`{2*#P2$mi@I+lDe6iumq0-;6Z6n)^`+R~^yYuc z^1a08`&Wh#(AYxLOWg+cKVelL%fLoh60PE_&Ar}px`Iy2ckvE0Sw zH1(;;(fn1p$jt9@a?vD1YLM;c!v}N8*;UqdN0agHRt^J8I1qH8SND*iS?-S=4!#jfX^*cb2PW#= zq1YY_F7HZzdxOe(&Np|7o&l#lGqG*NQqX>E&1v;PgMI2$+E&I&fui1p^I2^nqtH?WejFtkp{weW6D+r^MKB4ZSY*_KpXB zC6%_txdU^%exWWallsxAOHA<+lf+Z5)A;%YfhIScI}-R|ga>1`8)ekGQWuF6n;q86 zf}xxX!3U7HJ5P(6hHcp7nWfaT9b2>8sKI)A$+01{eJXxcE&nzx_(G zj{?p?WdtEFNQ5o76>xemB*D6NSQ;g+?`1e=6gUsPM29;bp6S=xcM>i|xkg&|8rd_< zZuUKd3s_-Qmz#yMgCnT}!5DH&$GaAX5sBDL%iP#UYDZ^7^Cwkjj@Ql(a4c^yBslfw zCjQXJFvVih7CwYC)k6aEB=Zo@5BQq_`d@3_7MMF_V)fmK8WLKM5!7C}7_QNoO0a(& zdrf2J-U*|=p&RE?o135Dbe3K(zQ)RrYa4)hd_r#J_rUF}oPBYCydLbP#yWPRT>{TXa0bJX=j0^8B9XZd=GM($O zsvYkN95i6gve)E_OJ?mqi00^qf0N9?w% z10SOuqW@=P`nSor9tLb_tp&Ffi)R&W?(P(^9~5@kFYoB)c+sUlnz{_ja)DKxOm7{9 zU1EUbS*ZR8-z99TMDv5Tor`TTdmk9pDWVkq>rLjf?1NQeMVEU+Ln2>e5n|{b>yL0H{n!Sz!Horz9SBWUcG*a@ff_o(<^<27q zYy#T@)qWQj1qH=8)DG_7wme{!2Tl%KN$x(w(+&@qi?=59H%q#E-(Qk0pR-cfO>knl z^KY)j`eqB|y=WhD-X9#Umt+0*z-4V=4WX+V4uZ2Jhm(-Ww7EO0)q}*{*f?_TtXa7e z<#nOem*4b_ASyH{s;b(TBBgtor(|Pu#pVY$cDqlD>ohM94wvh-Y`Tb+qEA`sb9k4) zH`;fX8m+)%Ew`qu=RPD-42iQmas(k7I~Q7okyg}!|1bhXPPPg~&C2UN8~Wpv=$}_X zkS!uub;8NLxViGNhH)(sH^FtY@%E={)4Zxtc8i;-HWj-G&2&8ZFXwJ~S+TivrK(O7cU>N9&!(BWLrSmD4sqk0snNZra|LDO zK34H>T+ruF`bn za2v{+5Rm)gkw)AVr}nc>-G)v4In!+l~nS)|5`sflU3KTW)}8^3jTsGoL;rol>? zLAA3e&#`|ssok_JzA6qXyvQw@kzrWivQBE&DpY`2$8mo@x2Jqb@`*KAKziYbC`poh z$m~BuwQz*3Ks0#pKzWUh%VqVAGn5qc2jYyxFmXRDNHwM9d+ZXP26r`Y&joCFET7(S zmvL&})626rmO5hk$d|i)=CeHpIoQjWXE-wL)}7b*{SPagFP`j$yI#aVS@7Imw{gk9 z?@-J|^2ZI%L_no|V*`^yF1lcKhusxLw6T-bN3*%zpN3mIV{$z};L35f!cfiTvIop0 zdDW|#<~oalj)xtwL{sIFr0#n?m1rrJCykb^?z(QB=d-|fX+^==x+XcYj;gN3svI8( z3y_5fY?oP&Ylm`amGtdk`g+sDh*flr^6s}FTB(rS2D))Qqlg*$Uj{{_5v)t)8w2aN z*(1@;zO!ATkz2a1%178At((oRom#4q)s8Mt6Jhh7tR}5hzp4?R-PMGVVe{>@S#SE< zKkQ_{hWUS8>DT2cr$y0KxUOlrbHTnyo`~j8Z7*{)>sU=*w%#9`O%ft(QBb!mgotD1 z;0|^<%h1Z|LRmHE;8f({vpo}gU){;QPkt`g2^DT5rsIP)yW%dsXiT!{RF~g9-ZRR8 z^P;*tsuTFwLMNGZx&G$Cy7%QHGlVZik6Ih;uM7BTU(B=l+#!xlr#(dk;-XajaGtwo zi?D7VIhP9w8BsdJUK*=KJ;Qc%Ti{VGJa;%+-HJ0)2x@l#Y8{hPldeq&g1@CaTaoVVyq99 z%i`I?PeMVzN0RQi>}Vck8yQU|pu$ox;4)i5ZGo>czSZE@uBfjvwL9f7-+BA?9QCGa z#jMFr-P-VDcpP*Vj_S7i)_SVZ9{YUb7PXT-&)S#nh2+|>5o7#rGW9u{RdFjdZzqq;D=xUIc1Di0h6E6iI2Y@=xkKN|fy zv-yXZ;cFo9Q86_!*(goOo*BaBOnBE)1Gij#wgHQ{=S>Ya?4=VgRb0sBiScaie2yuu z!YnBUj=~pyxFa*3M5F0kx+=Q=dgY@i*W&2e%FYjnBv^OQzI{S zry=ZI>HgbE|G;;pOMOr{W;wOPv-G5H%* z6+hG0P6V;7ei)>Sgx(|h!rARrw)Nb1?!~vUxOOsQj@etDhint?508T7$magS%5$Aj zIoCY?O7spURKAE>$G_~Zi75Xi+pL_cU;|CCoQHd+aeW~rr*fEU;*>fr+k;8dp?pX4 zvo@M;G-unM4E1@_m9!NbECqgYkg^v?D;iRtHu!WHz`maxL=WdfbdeJOxHGD`G=K}$ zeEHf}94h<*J4#+bUV)w2T;$3t5G=8qG$XxuY%SycqJe^)- z(r~3e{yc$_Q@wGm{3LDpMf-E3FUlG$G5O5v1+bWjU1GNT*XOL(A=c_sx)Vv%ACP@%QE3tl`>Q zkL&f#${l*9;w^j^Kiw}1aGBp=gLF-Rx&X+~17(77J6P=8V_lpn$0uPl>I<=fSa>3z z9j~G3al(X6B#?fH{sTV8gn>v;7T0;Y~>?kIEy*Q7}ZxjE9 zuBo>#=4GyoeJMTB-gb97dvDtK<+%-Ht(f6P!c4btCpO;_tFviSdeXAs?ib39(pzQC z=bMrI>7z#V7x6=5UgXgHq5eaxT%%9wbc-BQj_x7*IaD9s@GHN~EO)SC8wr0&0m^dD zr`xHU-2`pMNOkV82U}I0j`G2kVy$(H>?-Ce1TA`r-1kWbIQxorn(vtG7gwQP3_i|X ziVHlbxpf)HAL+bNCre<=cd#T1iDemSbxMkDWojFS6yE1CTcLNWe90zhLPqXUfz zf4KbU^NnjKXXbG-rbjKB(V5~IPD*_9Uubd08YHGMCdD?c4H~oagb42I!0N(<2^ded zQoa>yevTPD*-*`6fRo~miOZkfWB$@3pPaWXtWx6NOi6jV4|hK^)m*=y^~|~I%}~aH zyV+F|+dbh$uu~j8e9S#DRs?*;5>x$?AS_ z$okYcP6@!KJUnnNQd)Q2g5+K1kmhD%k>$o`+#eAxF1x?UgP|*Uf0IPhBs=0}mt4{} z$U$k|GqF7PY2hBetbRsD{Bc?j5e~$Sra^19{!JWLP%xN9kf1d)aoC=(Z+;!yKT!)v zlQ=xE8xHpWQdF^O(5zO!*F=9x;n+fCS8H1S@c1}aGkv6}M1cDA_||+#@p1K;F)Ed- z>bi+5_KL-)gi7-jfwZcE>aZy_acXFL5 z)>vD}lVG?}ld(U;$t`TcJt4$tDO~i-w7F*k6hwzacgsW)-r~ zP*yhD-`{WKR5kWSLPn$U_{Se2A=h3L)x6d%9SmJDP}WXXb!pu$+y8vva6;Cp$1DwD8WSBdE5HwY1Y2gnv5=^@Z^B9?6JEY<^3vbrbK< z=_y$Or#1Rh#+B3>RqMOfdn=R84$7qj7(2}~?)$m;p`%Tw-Q&%bhQ!+EhU@lb-RVUB zWYrh;Nir^b!dwuil*)@CVlL-RIh{MX`{OV(LFX(h!7}@&>KTkYiRJVaFP3#K)FkV2 zSM@e!>){r;N%aXK=MJaY{3G$h)a&Pm4b4;eVO?&yt&WyuFNDV{O&m(i#|5nl<%ANK zH*S@bBnWq6s{3k-UaK5vQ>ezlCe`MiyXL%ld>9*dTGwx%cME%pZgFfBtiiWRdfQya z?eN~)9;u^TTwGXVMBf>si2hZgNXVo>aoYld<8i-QcnV)WEPu72RKD6|Vt|XWxX1q$ zdcHLeI~a_k-dM9*8jt%XJc=WHZYIU@trSxAihRvkk601+sX?vRG~_(9Jyt}>VHmIY zxCtWaIeo4dvd{PO$-2EWyzB#|o1W_U;kZkMt@ zhQekc*V`f;GJNPIhCy;H8UfXba;ab`eu-H=lgVH)Fv-^JW`BatKe8rZHWsG?a3N&E9~O z4VLV>6?fWa1&{B2uH+w8UWR|LKAgTh>gW3Ys;D!ux@KIwHh#P0ATI&+hE;xd*Lb!S zL`8QhA^t=^>$ReFiel)RKEwVcv@JqB(LH!~4C6#J`>ON7NR8hO*MX*qQ?vDDv%JHt z(iEH5`vW2HZ4=|M2yOvg`CPceHz~LzQn?Bp*Mz1 zQ@FTH_dZO~6YK7VPjYbyt?#dUH;7P=tX1(PIBib z4tL2L%);GAddAW#?E>^gH!ec>lbt7ELPsxiHVQANYYX*!q3B^c0C5pTF@CWkT0ZLN z?M$!T%@6O+n;LXir5-nXXS=>R>+g8laW-SlZ#XkjnDc1M&`QABEbE23%GdQEB;-y8 zVE2#Lc^~|{_7q?O?KzOD0s7|V`KLPwV#v%?;I}A9$Y2~aCNBx3Tt=9>^N@=EsBmtxfr()mr|LTr9q$Yi;hzg}%CLV&Z z)uFXAwY#fZ-en_=CI=yqxf0p|jfA+KX()Z9S)9{_s2yC?$|3e1#etgZwKQg_B`ja zYk35ueqW`4b399L4OadZk~@_-b;(+?{t^A~{3JHfqH9=qRQys+aB=$_%d*NKM%!(& zTBbI~S%PX)d4O+s&VM7}nY*udU6Wh6)r<5S6J0}QAqeD^4S>X&zuz z%_Pw`4{1nXLuJQ`*&LFo-OdJu&W^}WPhyV|?X+Su-@5L#mdGsaw3McVY23xNJKpae ziG{m-FS_g(Se4iV{g~BrTJr|@6j%s2En)mEm^Ih@Lk9KZRf=8OQ|U&bkcBSPiB;dS zoXVGM)+fb67n@d8V_Z8OIbe=Q;Z5>^PG5xp{r?JZ!&ud zpEYw@RN88c5*N7V!k5lpKvZ?7u44hoXxc}3vB>Lxj!@O2-*jQ?JQ|>K+o3#N*oA{S zK~1&TLDj9t!sAwhQ+$>^e0>uHAx^+ILHgZu^dZA)wc`@Bc~qwp>(>${UY#juj&isb zTihpxq}L3F`>9WDr#*BbD1joISXG`XPjoj;xqO8bES7y(BDGB!)xCU~=(_DX#Dagm z+j;5e$x^I_srfpvFDQ#e| zL`Wc|<@^2CKSvWF%uostnL<`Gn`{K3>dwuh?et5H_Qjj31&px4CfHKz@>GGr^~o^n zK=VSKDNG1UdB@FzQR3b8>_cP!>&0hR_XNMb3r^UQTz5$EQyvc)&r$92IXIy0>Oqz=Ogl{&_G@90~d&hYM5epJQeaOrxRU*38)<;%YjKuP+9B zy*IbV){Gu#WLIPbSANGmwVPtoA0|q zp(+FU2&{aVEU&K#7@rrYln?ld*ONr;is}#GTBbL$h&^`@npHI%8A1E60>2I3)`;Z= zL~ls5HA*}^PPxxEUV3VB+k*3J%^Z7vn~zo|X0{3HTu7=9@2RDWG}OWq>vRARq? zHGZf=BvBg-_%0YQ;i8fL^M6!&vQ@Ra zhAaw_i?l3T@9R1`wIkx54Zrd9COfD3QAG&Y5K6*nEP|1c8<}#?5>zJBo5;w>=9WfA zM${^7jE(>O+GqiV(v|re=aL4 zd#3!ro^x|wnVo4giVLD~8<*Q<5&mYyrOV z;SUWC`W91k3?JtUT3$levM)E4hn%7xH0A*7D+qH}%Q3;Fd0q4&Su&*rnc5&Edt%(oyhvfReh0{V1qkXUG37(XUVCo$kfHb;dMCa&(+Z0&;!zR(tiy*y= zMJTcejkzaYif*BlS>oF_aUp9kp3p#84*o=U&8zR z@?MpE_L~+U|5Rqe%A_mKREniYNw&MbwoR92`q1ncGyn15qgPK!JYeeXsm9pS0lVTz z@Jwy0&CkEOzSmtC0xZ3=s|RQjEg@6GR)na zyv$@JrC_T*%g+YT3toO4R7Q7CmucuCTMGu>-)Eii8JO@p+K{2QKmZHsdb4%HS$0Re zcJ&*IqTPk!Yl|DL&LKsS?JUvD(bf(qxG%9i{SLW{R##U`6^Dn1HI)gK17qf$zSwjr zUCCO?aU(2_%*7pAP$e@Jjq36>(fpy}#&{$TjDxU9@87oD)G9$00R5deKpwMMezAIi z@hE6qI>v0i^dy>FV`SBa`7L>a625dZ3yzsxywa zAKF&}S~}1@e~b*0?cX5^fDXWqQIJg!$)sv%k=APpB1QCrRU0pcK)^8C*J;)xiaj*2 z5k0(Loh~wZC~In(+uGW`YPKBAj0h&q*QeCY`Kg6OzxN-u7*`O`5ac3y++9+SYS2 zL}6je5>WfuqEYzr!)DT4@g->8oMp4z^-6dg^p4vAIr#;RM4HS{g8~~PjDBc9DjNF&Qa=J6$9zkp2Bm)xmx7>S_=Dx>Nbjv-{IoCoN3#T~DuN zA?tWB%MlXQFb*AfUp${gQpw5mH+&WyHc8B?wW2K18$+aSh^GlU0LTzZ=Tu)f!IwcA zZpc1-T9{!uv%x!vT6n4pBu0<)Z8K+=&wL4!%@4$9Xrj+{pSCFIlr+Z^V;={o%^tgn zSIr~Rd+^MU4K1*m!)bS`Sj#fgu&}RWqoAL1++ZtM>#xg?mtw);pxvku1XMXs9im?K ze1n+SH2zPL<5O5w-yOB@(Y8tIIHyi7XZ;rJdA=q9wkkw5g7ars)=2~QRHQvN4sE$e zLq>8Bm$q(v6ISBAo(q_NIBaUK8wpiM0B$cDSB7m6;D~KCUA7x-a5S$-Q1IJjA?T-h z5vv!~J8efurb>R-Fo_(7A3fp~*GCBvzBp1dr;k_I7(ys-U0g*_*ACEN_3i{g111fF zV9pq&$IK~&CU*%DIJ08 zb~>fhir>fyxyrhap$6noghj#AlO~r)^}UId+cFLUs*pYu8>KjSg5oZwocL~VryE5p zk|uX)?Ca1O9EPE;rw3rVKB+4!JC@W)H!~!TuiESYEP9h{Ql_v!#p|a+a_W)IDh^-l z;Zu1<_eKy~%1?_L?@SfU_8mYrJXds|qMV=+qHTSXq96d1NKa#4jDS4-N)fA6rA`Hr zMbOrDZ>$($57M^Ofn|7b5R^YZQ|r9}05RJ+v03}1jOia>&P8!SJ4KOp8)J=>eB&O* zRZG#K^C~-IyR)qvzdMZ2xroSTt#kCRExBhtBBUStG=%lnl4lzei_k<`14-4z=Lh56 zRX{0QmM~85F0?$=odG@Ub3wh8aIpb>_V57m4(O^Gz<&%Vhx5L9B*%o-8>Y0;(T8BN zjX)`v=0sO2m0_oJa<22e+iwmh?YVg~GO>zG`{`3!eYD#pov1P(uraq zh!6`Pl&+SCZL%O$K1L>ry>@&Lv}`N_Wk14gmUjwdgf<61s=GXp8vuRZV;;5w131u8 zz}WGf$#KIMZ;AQ$sO`NRP=@1y#gV^W6!AqbT%*wK^+c$_)o=}@weP*Q2LmhBOm@%8 zl)SG_yPaldActkzl_DaT106=t97c`Dd~c-}ky9Vh6|8dmLFGpA_&O*C3F#D9mLdo^ z0?^8e(-q*fw=_gnjV?eNV7M};I%sZI$tabrQM7X~Zj$I~0WA9~rsv?;1(mvxP(>~2 zsftEIKhMcZip-$bSsthoi+-$@NtfRJA!kxm*VU-{^1^xjB;#gRl0TIO$Dl{DmSd6Z zIH-l0=c=6bBd_36J)v*`MG0xec243f04O<>ZzKhp2`^dzAU&tACP_4U=^@NL9JmC)pe z0llX4xHX{*ixzZCe>JtM(QV|~{hi;kD~@-{J|>~_gg@1MIHwqJ&mz`i&I7?hl)9M9 zL;8IsvLmZap!_9xBxGKEut$5)=0zQVq*B{jiO;yzpDve34?!JPGr%I}vWS9wTa28t z!w|VEkQ}LBn?$4tN-c{)jo}wZB6%;9bKu7-4s=t*gC(60Kt)bDD5#W~a?7+!HebN@ z^{mSr|KfVz4vp~WdwQa@ei)*IkSXT9H|g(D>eoYsFLFUG+lYBe^k&aHH)XB4Z(%tT zy5sPoVV#4MHCXdToXlx60QyaY2p)`#Y!ghJA68#Nz?3=|<1_bGD!DRd`NGCf5`dF) zR9DX3NXf*{kW}Ce)&lXEVOV#O3uptHR(i0*6ClYJwI5n{(>%K< z5|Y#KmS3zKf8nyq zEPk@q-yw!*I7!c9QUc5jeQnZR6YKE|OJH3OYOdlay|KMtWE{(`??pT_U}KyT%#{yn z`tDojJgd~Rs$Cs_d4Ve^uUWzI!t$4E{4`gx zycC*c2VlCwY=!eUl_LG`b`cHKXAPBFmq49c(5dS{u^A>Y>%(iBdtPdT)&$SZM)OmF zRc|_#@WkNOQgVoxh*S7~oPBvb)a%PlQsKJ)UqU6q*oJH&`>yPS-#wjEo%1~BIp6Q=_fN0MET4P1?(4pm_w~Mb-fkM-h)ykz zfrmS;j0}?L<+`>}D{?+-O@(_hq4LyFOqkZbYpmS}L+chs=dc6q2|#S<#(hZwL#Upj zCY^-*e|8f^Ct$x)X+HkmMSrq@Q zDF%IR9hANfXc%}nt};QWU}x}+b+2jLr#khN%3;UGg{GgYFM;mrcPybIf&KL4aB!d3 z>F2!eYr#q!S%E@jU%E6kTsf21_ih+LpL0xow!_21vL1zw`gU+bf5v-gpKic7=ReoE zm0~$GX+GfU;`4T&e|*#jbpfWr$H&5At{cULdNo6!-R?|)F}{mQ*i+OTBmQ3=NK~zO1R7!)s~NM@~QLel8KoD{|QZ% znVRwu--2t7eQL=atZ^RQw;DhZXa#K3XAy3|pdAElcFqoKB>N2*c!~AY51E1T!g4b1 z$~R=XxkPuXFBv31b(lSM-wk7mOi{L0IB<&JYoEUlhRMXVt!6Ng`f#jX^(6f;zJ1wi zJ*UBU`wVo$9y{O)9VR@%5a}QWji5Nk|A7q6UH-}VS^pvU-td!CmzF?P;a9ji7p70` zv%Q8?Jl=N(HD{RHEy2~#B9=9r)u&}fTjt3t4iXDr^*?Lv8Y8gC6ttgF>RrE$ZKM$R z^rp|(2B_-d%s73K=w{dbjD2_YaSL$HAF8pmCwil&=Br*eK{-rr2rlOcBw9Q~uU-vE zp|$Gu$iRBTqbbhtG8=tT#8!dpSkUPZm^v8w`ycaJXP0!*+ZN@-d!FuJg0Ivz2p+B; z9b!cjR#YK2w59`&3eF$YlAR&Ko6c>Z{&N9yvVTmX%5{sK8U3b)D8Pbtc`VP!>tkko zLy|`egMHYxpmg5@9rV40hXM!7>8lHP|7hc&09)tl?7B7ouu3Ti&TFwOaj+511-wc9 zGur7|{YmrHpD6zpZ@J9^PIU}b!|%xA697%%;6rs=pE> zKs`7s?*3TYdTfITVZ@Djwg)WSIuF#m>`~73$qN^ly*xT-%rBZCImvgiDo>Hzt6ugGLkBR{*xWm)d3&A~5h{@e> zQR9{N3DgY-1ZKTtL~@KGM~(w}ab@UaUxB@)Dr>z_`#Jrk!O{nP>TW)l1{YsuIytP^ zk6No{GY7BCJFL7crYb_e&(wK`D6lO8mD-i7&x>bgz%{)*I&5?)4qE}7XO|1261p#- zdQQNbKU?t@Sv>oeMny^ck;?WuX#ZGi@ni+zrsC1CC$l{QR8*qHkoi&l^Hp3K-Qp6y z*7${Vb!%W#HZYO3+pv*M8&0&j8_qJBGT;Gl=k(C*)q93``UMz9zFO z)VPX1!Q|eR$F>7(olhX9XbM*LPIR;LVt)sjWtCUy{Hm`K$GPe&cZ&^u=C2iCJxeqi zT6Iya3Iz?D9uvg;j!F#D4l--jXKZ zFVAvAY-EVE0_rZ?Kwv1#A#iRkDN7Ru0YC0NK*-4ryDk>g72hdAOF-rCtI^7F*=!gO zQ&1cLJpkf&Zz{KfTUiU^!tcEglOL>Cx)4iB!}@sEVb~t>v?vnWeTylvM(z5sF4=U87M8J3yBoZYv_4FOu1MdOzCX)Ri`!CVNz0i8X5_U1 zW~8+YSM>&8WtU8h*@mChEU_cK;s(UD0>MxJv>EI~l#NW*;xCb8YA z=+DkCJ+m!5K+~WkO=}gbDW<<~J1sD27#g}e34%Xc(mI1D1F}HXJI#FkKvh${!DeEr zcU^-xqzJdCDdY8Ves@X}PS!aJ7Q*a0`{}T<&#gL6=b}d?_c+M$)g-rRHy!E?nWd!O7Ar{P_NSeW1a4Hw zFsR5rAKKTX(8wo&%(Y)^@I$4Kc)#+ZF0coM>KnewHmI6$S(vv7*sXgKZ5h|o;av0Q z{4q#W*SJ5;o!jRaP@+62Ui)Oy_~+6$(5^I&mFoi|8@k>0oC}EJX(0Q4GUITKcQE-=oB&oG)R)Mw@ zd7W2i1RHtuy?;y|?R#2dRAK*?H7yz)0oB{Rgd0;;?azw!h6)qs*~txJ5A?gn!hk-s zwHh$oQKm(1-aSdLnvB}ja(nJrLh(z3;r)$BGN8q(5L0JiPCV+ zmzKl*)m287#tY73BH8qvYbu3wKH%t^^=JedEg~IBQ0LlA%<61D$l?$^xk+D=2a3LD z#Far&RWW=c77x(ksi#As%a`4Di=fm{j6X%4Y#ng)tU@-9 z!rY@h7KErwy8V)mLMr2 z=Uh1bcjO19g7P$Sh$d?E{Xu?4uLy$hGOCfZ+*3K*H*LlR(7Qaoxq={3+SUQl!@3Y} zXksO;d*3-;tpaXMtYE;Y&3rp-_F97YJt%6onbL?|awbsM0Xy$%mc%IP#1F~vUB_L8 zb@m*bJ1wnhs1f7dcav|a<_>mcTo>6f_u^heaEEdX8Lhx(5PD#~2EKGHe>S$BgGP)V z2wV8Km$%Ep;hpToX9PCfaofveJhfdJA<^|u{g$D#?Jw`uxozJ$7lGt;1>IA=M&z!Z z-WHz0Hd+(T5fdM*B{pGm53_n*)G%f!H)KHJ_te!q!%sYV2BW>uQ`NM44h#oenX~!j zssZxbo{p=Q9xGD|loAe0#MI3*IStqR_5-b>7ctqE@3wx+9(W}w0CZPL_dSDOYR^Xq z$Y`dU3`m)cVB7#X(Q;0DL`Wj9h1R9fvv89PxJo0luQWZ>yUH5k*P?Nqt}bPOOnP~- zB24wkdy4qEfawpN~WuCKfWbeoY$(~#%%WrN08otT* z0?s)U1);*AEBOnOHn$bA$$HMCxM3wJwhG2<`V!kK=}>3a)*{|maW^P#t|}XT*U0+( zCb>qJQEp~z2&2M!l+0SG8}Kw+0t%W&4vx(wIW$YT#sa@D?-9zntj_nY5m#pux3Bmu zaF4nzX+o8A9rN}RZaWY84 z%K3Anc#l&~w!!sF`cv_9HO>y471pnaLecJ--Ws-Z!kJwe+PJ+~BMlpjLWMMn_j0h8pHO1%k_CU6{|qxC%>S6QQ}D_qsN6iuAYg!|+Fl1H^B^!&PHDhrZvpY z|AqJ&bwD6Y1hK;`G!o}Kf2Jx%XT!M1`ofv%$BpB`hk6B?e$8S}eNij%l)_U~MGIlW zH&qCu49(b8A2&%POCN8fia?LBm1T8#>oepXB>P(1VK=)z+M=k`*tok97?!#P2x=Rlv*EA@Sk1!dUJEgfVmp}jji2PpGfMpVvo?u`ouhpVGbTB?3XGIdMSsO)_Ww#;oY!~ZYweVu7Ci=}6iJB1$KG)csZ_0fZj@3N0IjTF?^7+Dv zV9>Iv3(0``=#7Pi#h0uiZ1&LFu8b6G1!JE0m<3mMN?d0FZxE3X^pI_1zC5q*^~fYE zADQK?Y`8KMee)34~+r|rS4J-woNqMt6TGI&*Lq=%d*qMB) z6!ckuyY=ujG91;Z{=9N~DkriXBnn}}BNPh@&W+6BD=wq^)icoe4Cv6p%L0Tx@AvHJ zn7z7l)1KtRH@)oV9#RNCFGz`z9=GZ0vJRLHd>)0dMULN@_$VtgfGAM)BNWl&3FzCb z@I`YIH|rau|fPuRx}2(2bCR;br-ty^jBG3o?c-;Cf%Vq7O}7LjOni3Xy2( z6KE)qxWyc0ZQILk7Z&PCwzus$N@Owi9ES5Ol}$AB5r&X6%SDqrm-4Hhy1CbVss)rk zbp%g_-*bIJ&-SWFY{K9Aqw3YBDKI`romLPU{RF6jj4bLhs&c;t=$oDJBX}8~S)xiJ zjl&|@+3p5#7u=P~&p~;OtS?M2pLbh2wg1{+f(`fGw*iZrm5K55jgd+#mvILMX|0A` zqpFV1BI;Hzg6-B3BLno75p`*NydgH4cXFFRq@F< zw6Y_uoDWH$5M%N^)f?BBtA3AXmi7*EypaitW#f-sA;~MwxHsnx5|O#v3_AB?P_-$~ zrA2It6^7g7x8vV~yoD) zP`=nWsxxjWNpoBY^@YFeA<$@^S>ram$ZV)2$f{8k*yW4|2ku)rqoOF;@W)1Ks^jvzCi{Y<;o^=MfhuY+hdh7|O9Ps^JXEQ#n_bNA~5(O+}m zDz?WB)>z{!eHmR{&M`oZkLSJ3sGEa{0@tGnhWDMTGxe=FlqNM)6|XaGCxw0E8x}Jy z6lb!@jA!GLqYY8-QdlI3FeK?JD9qT`rPWN+@Km@zcXSByLfcJbhqleksu46A%1Wtf z6_^ya{NfNBn-A^QkK)XAl${e*`)NmRrVbirUaOD}ZDN9&1q8@CBorIUuk8RAPru0n zlc{v7N*Nd{K49owY(I*;tLdj>!&U$8K&_6MK3iBs38pDOm#>hmM$zc5XG9PgyL_ z=SN%rHLptGqf0u)={tJwTloEZY8@UA-21ZUr&T9?;wudn+buW3@3gZUMbFMTy#6 z=*bM-PHr9A$U(Q%ZGW~@CO)5!lb)D;AWdWNSXQr zd-Ouxj`kZg9WOLk#Gn_<5)Tnw9D`)5N=0$|gHVkLyHysC2vnmyE^WhQvjx~tmVg1y zWdzn2qJBC!5UxBg)Y!&1q=9WT46W=;6m1z5wPA^6Tht)C3aR2_gfiy@X!54aYm`DQ zCufsYlE=UqE|r(d9d}2*R>C$GrxJ9tXD3i^b5w|#qcQWY37&iNxdMtiZXMPj8TQD! z*>2OlSAI0)i4lq>{my6W&>RgvrZnt3knS`&= za`F1pGgq5nt{XsE5$C;yY=522c@UFC0g^f;+m@JVq+=7ZfH%58V(D&D{(-U9n+}Tn ztZL#aEw}RRi~D~ED7^$I`Zi?5!S7s6JOB>EM_&P0r%_QN5gA%?O5>91Fbl?58|0l! zlOl(kSfUB~K}v%`vq~YlCsZ60Q8kwqT8mICdB)a0QSY5}!==Q!q?I)_s7LV#I5fQT zF0cf(fI!0+?6fKQeiKh+?Yl${ii62^615E#Kc^8`n^{y==ZZ4H%K)wWMus4j*KRlk<4}$ zk(`a3pmiR?&o}4LmIF8cLc~d=nF5g$o+7ObNZ@jO!@b;5{5=moSaZy2)T`6B7Q@A= zw{arv*5)}~%4@s1A!aroIewd0tQ&Al9F)&V@ih|*0N4SdQ3-;wfB&3(16T#vnr7g8I?f-vpm!!48s zvESz>iC)2M7S3+Oy=myB84K*QS|1Qkqq@ zoCkSwJwS{3nkgHqif&4Tm+dxF)bB|+58DtQ^D;_S^`PUhEv&jsW;*YZHJg6eZ+k0k zD-;|j$L)t#x0438YwLTd!*%n;aG2@s---$EP}UzOF`QP6oy3D3f4oD>hBv5qs_EZY z0HErg0m`6^%+=KafE%j?Y&>6<4On(USY2HT5S<`zw^+BKj*CzVYT~wdNXNkBxnrcw z!#7&|M4F_i_FD+up6Z;mRP>c(_nxvubeP!+l#0D91$~oCr<^>oe%J<{U+_Zu92!1Z zD0Kqq-AYlEYaJaye_d;em<=X92m;md?E?j}uza zZ-M%3bOo9eGOS@?0I5S7wNn`7+C??7xOOj{3>b_M`tn5SVP5{WI)^}=i5HL{>8F{7 zA#fYeQ5i~#rR`TxT2AogiYhDB`Fa!8f^5Bg3CJGfmHhA|;J$iRF3*)Y1w5k0__%>A z_&uJQQK+KG004cgU~1c^Z4EN6ShQwgjfx@3IFp|m1JkV$>P@^xoh4M zg&Q?;qaLYY${YgeS5(&|2?Z$v=#UfuV4}aCWPJ{uk`ZE$*3NWewaVeLEDkXFREmlz zLV74mwT^d*r4Yn-(lD%uGOwdP@xwCou z99DBRlDH!PygRkMSj_z5=s+DLu~?mq_WdD+ltSYKtE=8h(ps+l)ybuKMfF3lE#>RT za3vI*UknN6Y0@r82inXzmjEliie%q?p$4DE2usvJAsol9oZE{a?^H(-ZwfmG?K8si zFU}GBJKiWTkl9Y15wRZPe;AI|%H3T&+gtiTWf25}r0pRcsCR9Qjk5}C&U?cwTBja? zEZvhlhne$N=qyznJ$8R57VttE$>UQCDgL7hjOIw<@;Zh&BJ@w`1@5Oikqv<&*Zqxe zzN+6rv?PA^oEX5{yta^EA>S~r1gXcYHZ~Bq;zb#EDdq!RIkFF6!NQ+Ez6SU-H=edp zx(`OE4%i-^#{3eGM$DCsFGEa&wJ^#gq_u-@%*!$jl@2ry-CcD#bd5$*oOL;<&`W}qR5xOW?Ic2>s&iH84=S|ZwskNy8ouz%dXVTP zu``_0GX~=Cas4B+or(08p$n@oY|aQS`2%6Dq8;Zvt$WJ|cOFwvFX$K^g&9$H zPzoreJxv*5!lkxZtHH~@Bs>{Zv7v-Sv@*78YonV|lhO}wqoz9!_GR`1JZwhy^^7op zDEE_SIK310@|$xr=QIvD^5se!ikdRe5z|Kt zq{f%TjlOE8xU@9l9J$FGV<>QNXxT(DN(tz096^9=b0KeW6^z@0gra5NR;qJ#g1r%4!ti*dS+$= zJ;8Q)oU&o`nZ7RCLN?771#3pq3Z^k!8WaGKv>H4d^0k7V#k-8tx}G|9#U9hyac@0e zNqW(vOw~}CvdsmRQyQwhUpIV3&-AE&zav&wau-c{QF8L{X@%N#XLpr;hvR=qXF_dz>G&xSGp) z+B5}fjqb~Ws-fS)7|?~;w)!H?)`AN`dOgB{H3jft_KZ^u+Mz`J(6&I#raId#>z40S z`TOMRn`Z;`ufs}k+J2W*6$g-uws}~y{o)0a+=$6T@i?qtMcRwbO^|5KM9qIpPTn6p zsR5d9jHbt00ubXXP0~l9ph8(qm8bm{Y94mlDG21H_aGIIbnS3!mht*fe;UJIl z?;fzFQScl}q z6dbxqW^?UXqp5CY)Gqq|<%@{AgEDZmPjR>qoAFCgA^!rungdD?)&A*bhlJfut z>25xfk?V~T8Hgm%Sl8#M`>uW;g1OMs5uu%$b@qhf{4fm zPj27WEtzD8rD=$uBWW3eeR+oTu}qLGYLsgAWQ1s~p}g(fyCQ8ibY>d5uX2JeAOPvX zA=Kxp%Hw}ViItOAstIA|u->7{4DE0BZrZJ{LN zU|sJE-2*VWtX8oCw;oEe9Ox7H%gTZ)qmv8M75YngxLEz-#W6puugN+hRB`Y`P+^Rh z*hY!|wu$X!Xbb!Elr3)y%$fUaZ&I)rxCCUlV_qBnArpShW}lZ)b6dVoBYo&|fkx)! z;!z+5M1O9*G@TQ$(8;zn>Pzj4Z;G)VDhnQ#!{!3ajl|Y9)Qsxd9qSEWl$&osb6QgG z7r-A?8)G(( za-*v@_eD`Bde&J65sreRM~&V&;t%%p$;*v7x7@CO(%_DH&^LAj0)G1U5fv6OH4Rp< z=fj|@O|93)Nbh#3n@ZkZ6xtA?m)`kqtdoLgagW^}M&7kIF=*1_#G#u7eYbBdeQ<9< zzfUYJBnR{^3!`&|o)lZ;*Yxto4B^(Z6yvz(sV{rjK$<&(Pc8+1#3fz-6xBV-GmLqF zk^Hlg-H=%|HKU!!mt?tHZlQr3)y=iMLH;6Q15z9LL$bvS`EbPqHiv=1E zjSJ=?rrhF8DmCAD&RV*am&n+tRB0mgMwA)b z_pG?Dg;vFaAyHx~&CvCbp#CrVWvap3N_z`2%*Sxj1cnqC|!9c7_1M=90} z^AGLOzTRyNn^VJ-jw8mA*m$d(g-H)>Qq=6#B6}$E*E|hwsbiM6Jcy31#ndx zR;rGiV*4AMTgNJ(RUD;vk^_HY0ooS=<)ax@T6)G%d!j5$u4uh9^x?cqGF)jYxrM@N zQ5b+^u>QSW*Lo=KvbYtZLE!M&A>y4HDvs==#|aMp7p)IIN<RBD_o@#RUjR`}H(Fqp7X!1W zZwmbCDDcR{n!I)dUBJG>@3H4l6ctw30YaLmx6`zIl0j_2Ek+F~cZE|H1h^z#pc9T8 z@YbM8(nemp5~AZmR}dI(%xW-S-yP-UjCwr05ckwq2s$wrkkI61#FemNr-PHj1OK1% zY$4g@cr5*lNb1-$-I8Q!i}@c`@ZCwnu)R$q2sq*!7`=mYL+UBX@JwPnZ0A^VhFIhZ zD$^CDMFI;G^}81Dd)el2jU))$D@`!eeaD3w))i}5UaND*9l`r($aO1GhH1JLO68=1 zEqsGxV=Vsr2BzE@_c5jHr!?#?4uhk7PivkWN|5bf_QmDNeBSIdaE%_XxtnDbUG(M zw-1{s{t!yp4g#+lx{u_F(sah^J!n4r?QvQZLS0#1oy^WR3K&~l%fVW#)oZXl03ZV; z%Z`=zHf*g&b#OU$l4n+)hvlv-?i<~=KCj=zvIWSX207@kf`V++R@3pVO@7u(BU`nH zUyO17ozy2hsL(Y1mQV~~%%8{MYx&b4Uf z;*-RMp@sS`dmdGtHzxZA02TUqwwK8@0{*-JI}GX>l6l>;XEEp(^VL~Uv4w}{Jm_k$ zOD5|*VrjDxhc{I-J50D-HyKbu*W0?bBM#3y1RZ*k<&osNd5t;_gxi_*ZVv&3`P>0T zcEc<(Q~fBhqtyBe4Mq4l+Z`Tebp z7b3l(7GB7vx~w)nSEGD1x$5x4-20;-67~fmg^alw!_dR%YMu+HbUn3j5~2X(CCTH< zjEe$H_M2$I9JmZG!nRB5#JjqU{^||8RRD`B#W&^=)lYMB44x_SpfxeKvf3Dmork2- zf3w|=Q7l=IjBf!!ktGy`A`Y!cNDY}Tv0k3Us_MJuzAW@>Xdu#sZtTd=I&3rvj>%l0 zs9QqCWV21=6z{Ulr7vja6_jiD%m<=gLswGT7@!6KeWh2Q_sckq3o6dfB4 z4N^)Fx+>72lss%Sa4~;5t2{bSya<|9DFyWiL~$v2LJ5giPX(~Sia(n8i;Zo3*Vs`? zWgRVrI+CP-9|HB@UBUIbV#^*7G`w;H(Rx2eCRf zUssaVsx+ffpwU!yz;a%LghO~A@h8giERmr4n$2yMv{dHQ!qL@+qZRA42R`C8dt>gD zVjLR!F4MQON%Kz9ji~yt2INyi?$O4ptzBQL*GSm}$OY2G&_mYVQWaYG=vMgNzK$22 zDIcs!bME?Gq0bUF<(60z&!yims2{mAPf&4d$qB&N~x%${JSzfz#npdXRpbCW;n&Bb0c_i$?X;Yb#$GbW+-yM%tSoeJvXR zi`PFRg)@R#$)7%{IwxY4IFVOebHW78L7(+(l473xkKVvXwKN+xTbVZgmfxzhoB~wnNibvI|{>tB2i4H z2Ul=((m`-w5(hMC+HUSzLD$2qf(xaIp`8T+kM?fN*=KIf+fR!(BTuPb7;_I-tJGY_ zY$01vhIcM-9#v%ni0L!vbF{0zfY)|L{H;{aqT*P%BC*aXctXyR3JtpaG)p9Us|Bjf za>4vPRP*i}v!swn7PWnzWxqMZ)T{P|U1b1OwCG0KK+&lfK7iG>+Mi*y_RcW94#; zzur$FI=&F7@_J|j+fqGMduNjnE3Jy0V~jGbdk})bK*=t3mmuXqYcU zW29Pni^>Bp2pl$rSmZmq$b_LpLda1B-5>8^?A3SVc)Zg!$f2E5JX5r=?$>>4?3z9? z4*hpIhZi!d!Zuw{k*tX!H=rx?chP9xJqFH-5xaLK`3W5|xXivJ?%~waE*4_vFdjyI z>!V;aS>@i)`dsC^ef|2XPm8eW5qY)><(E*ex~!fWDD882XBx&f#lNyD2_BH<8cYAQ zigPcLNSs`(?{{top@9_-N)sJ1u0(q}3muFkxUy90W?cb#by_HpyKdFF4Uk)o5*7uq!ead+;*G~YCKaJ{Zb-W(eR9NlM>4X*A z{sd|k@6mv`_I?3yDl$t6OhY6IkDIo?m_Yfe(f7#8;7m8jhe6$h6@Yt{fd*Z<1lKs) z*-%M#FMo6G!+H3(*G4I$bjt?zzJEzmLYw|9q<`_ z9?LciBjO9|p+XQWc06o1ty8^mfwNu|3d8&TYA_&+}0*OKdS9D(D3@F0U%! zn(v$#Vei$hbDquVNNS>UjmX2DZ5;ekr9G1O&Yr97KC8tOqb=xFK)_V11#X2O0@D)cr) z@O~-FDj+$yM~6NKhzb^$a=S64ak7hzR)qrlv8CQa>0$ua{Fzz@V{yNze$PUfCl2%5 zK>2&HtAjhTmq0^LFA>-OzJd9IH*RU*1?jJkxX_tB&g@3-KQ*^YdjEbWM9U?a~&b>2tez2C=M%!>S7C^dw9FQ3rbVxFd-(tp7=MB$6Hz|%-MvT7$zir(|FFpQ0KzK z<>&@lTa(|5WJ;GqFaFLVNMg+7Jct+{^kj(n7}Dk=X>)qSA4fPbgH~_1PpaDySH|zk)Znnlie!Dgiq#ju+G*|&E*#8{Jj}bnP0vG|c zYVCH~pE`w2&8L{)^o5rf>$FZ~ZO-VH3O4LoHXD-}Ju;B1^tMtfCj$M{@;4U;cx*f@ zt_rz)GqLQ=3I`-7*Gxa*J4X)B0MISIP`1HJS?kO{Kb09lP9#iWXfCx3g0m%sfGZ5v zto`!XuMbNm9UpS6FKc~9+BQX6BN?=*b85li9d*+=ezQniUmxtYcP}%-oe0oBJf%2) zqq6mFzrLF_y_jp+Jm41n%3)&O{B^QLXM3ep$M^J@!pl>?e)#XJfY2rqrZmijq-vic zzIE_KTmzSB&YM@3^MdQw7bGv@SHtLKPlJJPOmW#c$ZLKLo%i{hd|m|)hsw;APlNLG z6Ftumwkpk(VRJjxao>}(?K#Wk0B7xIuOp9P&ff)fHWUg)pt3@iG+T)--3%*M z`~W9H@pZ($V_CX2n*kd+bv1&-a-TWkkV5 zTH)d0a-t7d@ni+hOWuGIR5O`C@~fz~3Ig2qJI12l*P0bwa@&7j`9DK1f9L@=QlEqq z#bpal%kky(85BnHBa}+iKgH{|4s-rN3Y;D{Fp%xR`GaIrMH?Syn*K>>!|rt*SA z(Di<;<|OxT@|J@o_sfoz(Egw6^`6Lu)G(JwqZsqV^cDffq~+U)pHeQ_4ejzQHshh7 zkpY+FsmB2MWwkcmxCL@%?+YG-)=&Mp>L7h-`10o$el5WtsYl6m=oE{$9DUPFT~5NU znX)~XOe2Uz8{|Sy0~#>E8*_vw>Ub3n=oj*yU=H^dhe0ixIUY=KsM3D+Z))-90^kD( zH+*W37dwm>3;2f@o%H$j>v$@hrXflT7qI^K%M@bq%vJxV9s$`>V|?bxOy-02FSsM9 zM_oSgaeuEwvMk&$j6Mxh0L%!kaq}T$no=&DM$!4s^QNg_I#n_h2XzFoq))k zbc)M6^R7SULOxS>3%a^Y(K5>Fx1vGl5+NJlg+Ph_eC$6Bh46U13f{D-t`-3T5U!(# z8gSC^vP_=Qx%%J_h8nzZebDw^(7a1y_qW>azl37`5I`TkQy(k|6mQ~ixGX3CMk(5S zzA87ZpT`ZEL4YfQe=8|9`#Iyki|wC3=X0FMh0>7SJ(Ehq76A#}$1^+qc#BQXM;2Ff zj-Fo8S+$+v=U@NVAbz~zLk{_N{!LG9j&IBa&Ped*15-X6&+$Z=(D~2*%eI0|=dY2J zBGN)OT5K)0|NJh2PdD0`yd}c5M7$21 z4_7w5y@P(jb58v?8uFjkhV6-@)G;li0uqdZv=*eQKjHY#mHcpJVAw3 zK<%aflt~5Z+n=0@Ek)@ahGLNQ=!{oBm|y6Ie;n3jTC;M3SW}dq-ZJJgIlX^zrTwn} zV-p#FZI2nIIr|57^A*jS#=WbKrlCcuJHd}`{*siREBg2K4?(8fJ*LGcf4Zq>mDHGa zKC!v%Pa#TKI*xwINOP|C+aarH>X5~qnv$jdcHe&&F{%$5DTBwfv?WWN{11spjqkhV z4*qfyN$76(*>sInd-lz1U?#t={a?n~&jpZtBB^@3n7`-`Euif3jKnzevhlH>Gh)v4 z)}|0pI(iF?qGa1);09SCl*3WhH3n^yuL=KkdkBbP;0S&~PCf6O3!K(@biSiCWu@_i z!x{^id!xS|uT2EiGw(Z@994`<@scc>Htb+7eK&A>k>M};>?b7U=a+scXUR-b7>eLy z^U*YIUS5Hsc6ejmZU5Ks(*LUwR-Kb3tllg7`lFIErMJ*-aIIPm*#bL1W`k|of zC%Bp98aiQxr2pTqB!rny8l|BNbAyMuqhS9qKfqC}y)!;z#rq<2jHVO5vD~r?2H%-B;`>vmNJ2f9Zpb z4h*O05x{~)@!_>%}Vw&7X*L1T6ST*-!eD4`q|jl!whS3YDYsOR47o0J>n#_j&4j`r`P`*j{*cR-27M7}T$kqaGLAg|Wd_6sC> zRz6~0Ht%MhZ@-GBd7dFz-z2F@q!|3BpaP{t5TlvP;DeI8 zWK{5vhv#Uh+CE2K`VXrHej?yB2#T0J`h$$`_fP)YtNWJ#(9DXPh3v0P75{#Qe53(^ zGQ3P^_TO*3U!sIx-V!{WpFmtxz<-{^&jpf9#q*D9C#?S`CHi}NBnR=(%vXx7(*G>+ z|0EwNT_BqU9r0mO#* zGPAZBwGt{|2xd#l$oS1C$H&LJCk5X74~EMhzYECG!D_rFd+z`!H`jUj0l8Z4G1!T5od;s(0-kxh78jU_fc^c1S`JWGIF!=r>XZ+_P2nacCNdKG7pn8I@ z8$!eIojl%wgNlBk*dhH>rT;oy|8NL_7et`Z@6Q0ThNhqc4IlN}tKO1u`@-ZXOhFII&F77e!CxcXc>1W;*8<7H2m_)jv~ ze_vKfdA#Nd{`mGXcw{7ABjt8f{RfSt_L+j{Cl=s`M6y|fVP|?AyZ}heDJj4;JOrqe zsn1u{*MA0k`_0z$4l4<2pHHcpL~ z_hu<8RxPZI*0fmD+$eO52v|Q1v#$9R5}I*Cfc~F1G8Cpw%|xUQv72?R6LOjHLt7ch*#{R=9#3mIy_i}B>wN8iNUJ@z9P{)paw1z zopDGHx&_=`|{ASwpj z?h#;M(H}PJQ?b}v;lF)xW(l7-eW~mv%z*~DB z2Ba^l3aJFcpC)E35ePGeIuJuLksfW7N^(Q&9pN@EYa+c8s>C>WI@`4KD}3-0u&X=( zR3s5xAQe;&=EKc#(aE~`I>O&PmLr@b4JKE%epmv@RRPA%;) zko*tb;iCrR3#FhLT6NJ|QVOgwt^dpDA8QP&k}TQs>o`>lom4peJ`Jj|K4M6AnzI-% zd<^E_Ev3Gbg38knaNRJ&W4PiUL?!%;4%=N^!+IU|&VXp9Xg^3JJOpI`ns*{Pr2gj- zWxI|)f==&V@PH$TpZ(`x;UA8gBq@om;~~bWnXx&(9a%uUsq#Af9=Ak$R>Qurm9`%- zMG3gtjbEAe?6$wWHhVNEN@*(}QesV~iimo0n;IcSMHR_hqtJOVoAo4`i zm*s~bge@*yOUGH>Z>&;xe23*Q3?`q@wIQSS4&ik}%E6cJ&rD7OAN@ ziqadswz;tm+C$#BstdqAt)?@evV``uf&15_g=0+>bz4Juh*zhKUZQiT;ADUHbF7Yd zgRhgij9CNOtuuHw#zM6}w)gm;jll(>4I$ivDT+Bx?lRf;@m+0tvg73BNZMLR`v#PlQH6R^@YpW6oI9J ziqFtEeOL3xn>fv8&H8UKygDFqO6#TS6@t~rq2MoYU+1F0<8S$FjkL-x1# z^O_BNsK9!@|L% zk!2gpn}dC~v3-5q1HizdveREt&+=M;`pnesO7-CT;QP40^&gaE2dyk)e7hAu`(975 z9(l<(d+TpY;(MBN96#LLKJr)Ly6S~X9bNL2lX27Y-ULu;f4~M-JwMU(5*$1MC`lrf zX}AMt$%l)mizSa~zJuJ-?S!kwsV^_)KRaKwu)Yd7i6;O-QPKBjNAH8JE58C#xj=xL z!KhIjdCa_gaI!-C=~I#qRf1XmlgQ6PdLRmJ1S%m5k(D6-2^y`B)z{K!sdM`P?AeV3 zyEb-IvNd1_obWz&GE$44l>D2!^QhJ2+Y*bvwa86@f&ElyG-CntkN_>1aVqKN2Kb%5 zoq{4-?+`S=rDm_%>uZ@#tSHaZ0IBA>zt#Y_b=O)|PC7DLnts>NG72aLYBLa@U4>Po znRY4%bECfaFohmjqMn!`*KrOMriF0<2R$ zQpl*uzqpZ@o_zl}pw@h>1vr1uqSHg`Ahnqzij|H=*tgexB{qmauf#fDg>qFO^Mqpuz;wdszWmEVj!>$pl6&BBL08U;0gsYXtj2;`^^5@>)c@H>vV@R&|d;;|c z_Ub3U34Z|~r6Kr@opC>ua)Qt!!&s% z#jk}^dd$g80FFHyw7$Ql(5NoJnE6bCn`EpQ9AyF2tp+-7ziVOj`=hIYAg?3zXrLKF z@%g69@wRi>-?aN`t|!`e3Zn`YL%fG1V)Nf6@AYK|CYT*;4p2($bw3>%^ClE;X2v=X zbPy)(IO+YhN2HjvB%#Jr^5-3(8>h%E@OoCu?T@C?bNWPetZ#-^K6EteJ^idt7_vih zRfXs6&ezFfCuPh54Vt+8LdJ~;$=jdZct*f2JW=wI+wxNb)A#Wc!5?zfAA!*a0qVzM zyK2DB$8yTH4=Sq#irODZR>r))CI0Q1Ef`)9$h%ajcWm7dKK{b1VYlNUJo$M{=OQ4k zg@POO4({v~m86lrGYNoXa{>@u=8FV&cnTbZA5pXUUE5p&wPO>2npR=q#!BU6ltcDk z*5lox@4WJ`L@X%Hao=g8tLL4P)d9U|gqDh%|36pb4pB6#vVz*%OsNGaH7Y0jjS>Qm z(>2dR)+!nfteg8R9@cGYRBjDfHp^d0gxMbdKz3Z~2}d9Z6lEOqn32sfipbuxlr39iMppJ{*hIhQ%XRgBU)S|{ zzrXK(|DBsUuk(67pO5Eb-XHge`HM2Z>l6fl15^Iv+{g!3|QS}K#FV%uPDq_$8mXcV8__N8xs~xV@btD9h0c@t?khHKWTlb!GvHZ`#|2jP9Rrr% zKyBZ{q4*~?z{(>VvCmdkhG2hWMi6-#o1+*JXK08#v?TB5psqrRE;5;&Bci z7+5;LP^`m(i|F4=@Ei;jjyK!~leZ>Y4i(6n`yCX52Hy-lKP!}M}TM^(6 z0KI1iOKov;=@{)zLJOTVy`%-iEPl+<+{!EdDunhF14O*n59~|@)m)%-BkrRRUlYi)&nmqW^jIH5zGDsHP-wj3|G_exuUzeE7sO^?2&j-l zBub;bz5<$1vLOv&&8K(WV2h;Q(|~a2V_)+9Ou2(j?Mss`^(#5iOJj!ZMtj$pO1WN4 zWt?p1eg$odcuEf0T^Qe!gBHLE&1{$Mxp>|1s;E{&^IeGzCGIZo=T!D@fDHt6l^H#GdG~#99qbVebajFP ztYiPUI$3z2<3BpP;)YFfP$Ds8zEwXCtao}yyM`l=%!Wyg=G!B0;IU3!sC9w#wLx#| z_@Z9Mp+1{n9pFc40W8=wYQ1~!pZHYXeSjp97jXr^6#e)k&_;D3!Q5-5`gtu-?k&Kt z9PW}tDzY70#mEx`*4hAqC^FECelJGxI!}Nf&IpHv7C32aLN8g^r*R~z68os@b=Zj|l?(c;Atuszf zOY-?>iyY|x;$Y%(Zbxl`etQBm-BOBEu@a)yeRr13w>Gz(oRwv-8je#o3|y~_!Xf=2I#H4=~bs z+q-AiUaGfOV>fZz_;2DYkJAeVfDfPJ?*_^b_Zm1h`EwO%r(>>BSMj`b{Hlhus|h3? z*k&2x83D>o^ZieIZw!Z_IdL-50G%<*Fukv`@P$Ah`b+7zc7_nL$1}JF3jp?2g?z6} zb7oMWedQbvdVnA#Q{@{Fv=HGw33hUC*6UtJ8#S@pehAa~yFo)bVw-^WJ6qaWk;4P1 z0z=tPz?W_TY;n`eaY1q&R49E9$fP`-h$1n^b0EkWE4?eGCACq>5oy+7Ie!9$2>^lS zSTX#--4o-bcEebN8rV(<<<03|;Ki_pCaj*k*Zq9>;zANTG)}?KJ-6*{_FJVNewIO= zaqgEL^C_4FCVBe6s0%HFs{I(6~IImBF;l0XXp(>bEq91RVM@MD@6x6=*EJE@sYfMoTsu$xDxxgR!^=unz&k>pdmLM4}M86o;JK zZpprAbQq#E_QH37c4Z#)4usbhv-owE2X6q{ka5^3U|&r{=goZxj_3s5UMiyWWKsNW zUj7mo(4lEsIYEy2I)`Xic?|Dqs#9aC0XoKTr@~Oz-aX819q9&UUU1AMmE$(G0v9Qc zAZ_)Gn@6PR2=~#p;;JH+{H0!LA*Y5|;eK-eY_}r4`MB!PDs0r<_a5whR*`)r3IHiByp zJ4_BkwI8#QyT)^pDvt#?>3?cyfFF@c%=kgk3>-83y^B?A@gy?KrpS+Q&+|~91J)Vy zDPF81xbI3gpFs#KQZW-l!njqQhYrWsOW49L&sHYq2=*0(`@{y*Emf%K>~7(+X>Go* z012IFJ!bb0I-T{2_&9Z-S6kLt?yBVUN6`#eBCA->tG6A;Hi+8kudl~e+$-K=zPkNW zeL(7+PpBKX2;Pv$Op}!{{h>baf%D+*5!NaojA3#mC^nKXsiH@i#$G?KquY=q?RgIb z+WhoHb%R%7W~^E)YPKLU)*dd(@R-T08c-nS)M7+bmSIn-CG7w;5CRb%vry;9f9L(u zDj~Q5+$qr@qqoN!F0s8o?E*;d4C2h<40FR+%Dx$On*|H~T)6C;oZa@EUs=h9;2ezO zgB#7W3u0~Rq%MllvEo_dPG$gt*EiQqx>Sx81)sVfI_alZq9tmpc$ZtQ^A>$5 zJ^WBas6&K9d@_55GWQN}Sl$~IuRu`|8bcbO+Csa<8XQk9BkoB+#ajj9Wx3 z+gf%dGE!3gAvCUyibKm=?GnF| zi^MxvgazMj^Xy#hd{lws8sK9$HmuzmP$c6lW-F`EzrZMVGCuQjtV4F>ycB7exSIHw zWx(yNw-J_ia~{9EoOYw=L*!V1?>7lO!ydXatfh8QQkq=V3!WT%amu=PWI#7-rv&@W zE_{W{f%Mth3@ac~R5&+jJ7}pn?pRQiUmyJSEEi2H%7I0?$+P7B;n4Mbp4HcYiDI=L znf%W zj)(g@=oazog*cg4#A*_+Yln!Q26Lh7J1QwSeiO7Yw z>U3BHl^-V*yR4+dUg)^m*C23Nf@|(lpgj81_Ytr5kR|^oAWas3Y0kY^hvDG$h*#fn zj6Hd);SLTtlS0n^B!RQ**s9VDPG^LCp1m=t!)quD{EQE|4vf$2mi={T$1wgQA9@W?D2a{{e6^iETQf_y^`=`0s&$ z6n_#{j=~~FkI~^xqals4dp1&g`RF2yBrop+oc=AhUzbisSGE zL}_0rwLA?@X2Cz2v3N&Yoi(^nSWqVLtHl^#>;cO5`=0 z*n0ENgkMrNrjgx=O_CnG*GrBnowL^zkO_&q5t6t|MuD*xZaJB|$=uVN4Za6nk8>3H zSabHNf=R81QqeXBL#(3%*Vk!3=%(cglq(D=D~+onebyl|q@m~Fc&S+dkKRrBT(}xH z!}v~txv#o*c(h35e*MK^?mUexoCjP=%9Gl)yX8&xW!!uvq??JZVUn9!y;ldJ6qVwp z9D$hn+i`BSBD9}y>v&50v4d0tYE)MqEqMgr&Q_geEf(dmjMZa3N4 zU3ZWh)X!Ia7fL{Kwyh9LYSTt%ss74H@5S7-w!g;8AQMS>5jaDFm!Q-^07y%XWJXjT z|6%@2@V*Z|nS3xpVMfBI?WcVBMkQ~o{+V8F2j>2ux`C#;c$90RRjhF88_rwN0Lum9 zD_1OQT&Y)OMkOw_h*A`tOLe(tr`?0~b)`c_jQHTYoVaI^2lfl(oAIZ`-U~&rbamaJice@NLutC0s*inbo8{mYQPUG(#bYp_W0uJ~9v=?1sWdLwX~ z&$ZrmnAtzmafMCmi#za)A>ee@N`8ex{%g%!V!jnT889+d1@swrPp&?Lw_scf&H+?_0@W?$)ndYBRP- zVjka$PIm_bQV_(SjFe(!2%l@=6-&r?&)_n1FZ{5LplE&}G0f+L=cLV_C{Sw-a*mz# z>7UNR)>%x|19leL8C(#{+sbBn??hy%g*p;JBI)zb_7&`*E0Zp@TICM2Y7U7F98IA*>XUZqD3>3Vr~M|Sc+DYo;zGN?uLDR5o4AJ>zNsdr}`wA0oz%4L8Y-K zUyFZZxwBDwyZVPJ}9vx|dph z0MEI2J1cEVrs&oBL0~MEK(+(R=cNO@S?4UjA;5fKQDa26W|!r$Uw2yO_*dwSf`inA zV?3q2mbfy?@K7#8oRm)1!z?Ayu&o(@Dqvr3uq zTtD)}Cq1<^YTajQg+M;RfGKPSPdt0QU8w&C0cUv(R~!q~t@_Szzcb`*UqYWp|Pf@aPp z*kwN2yIYt1-4>}bkd(GoJGB9_ubn&HRYPiOSTx0nwf&v~x6un~QuM^ZNEOS;H7^`5q{F4j0a3_@?bW)_Oe5tkx#`L0ZSGS~D z&#zgLvpTIKgD4DEAGXm)e^=8s5^1nDIRTu7HsPK^-n$>1(*uZI9yxiIb?Cl2r1dXWm#^EmEy1F{0yX zOzu2m^?jl@+xA>~WXVIurFARDKIvg^D;4Ri4Q6f+EUEb3m!1~A_Q$!O%9r$toMbm~ z=dPz~8!?ybd`_;8aIct&t4R8XwBu65YmZ z83j|SvkFBBqCAt80yVQ>%V13hr5lLzpdV;G6n3`biIGTaJ14{bO0a8#B!b zuju9mzk~dvwEEPr#w2Km3Ek%)2^2%DE=n5d;Oj6HbGkYf?PE$+-YE;J)M8p~%)+@8 z$i-~t<*@?L7t?P<9)_!tHtEo#Ijvm{v#vUNUm$MN0yR+9O&R6YsG#s#QClV?Ee#%N z6^!G_QG=g~CR3YYpv zx^TN>Ktj_NWiT!p@055igvrCms?5yPVpYrNwv2lB3`@K_hA+h@{*i}Ax*oH`p)R@e z@$SrBY>wub;t{`Cw`VAQm)~awVR!1>Oi)3N9qJy^wKb z@sZRbg*zMHym2v{<;~pq(+{bpYm+5I+)w?>fRHU?Dlq>LGCN4f5|S1;Sx|4dfT{fw zGDdourkBUXIc95Yj?bcDyUhQoS9}_iofwWgdw!G5^EsB-l)7R^Fub9W6BpZ{plR_C znrDNE>L7Qir&8yn7>!x79xha~2W)FX`9+EOm-{)nWgdb;5%W{sg7;$`QQV$9C-lQQYi>$6dcsAl zQCjc4Y*19GbrG)gh{_he%jI*tvK`oX-?T4Xm~#u%$5YV4BXVT~y_=!JrbX}E;+s|i z>c&HXjpR#Ei163DwLa&M74n7^ZgZ}jjZdtLyegZ#rofmwP3y}$ufVa(+WRThX`~`w%M2AL9Y;CR) zIf*uZvSahGR}5@D&N7c4qj1^ue=Bps@^W**&+8 z_HW793rr4*fdPb*_un#xug%oF*(qY{y;HAYp5Of;zGS8AF_i$KYAa#lHzbHb03|IxyqYVo=u1^;IT^BBn=A*1};^Y17u)86!ZT7gvZ+Ju11 zgU^ez#i|8Nr6%K(jG?59l9t16cGkGtpjLh1DNb}X`$TgCZj*-j%wWZOdAFDb6a0yy zjJ*(scx*r;iz+1VHHl6j3#cP`&(4OP*{M~JPQL?2H5KP)QE{c&oiTFFa^=3p*M!eO zD&@dJYpoZ9UQ%k%w?t$@6Vu!~?t@mZ(3YF;(P>YREQ%V@G!Zl#%5jP&`LDOpqJ?YUw%B0@yBNyMJkI|HYk)|B3Jo6I~Z4T(LArV(k z9DtKp(N@&e6dM|OT|P+zDoZE%PaFx8fxR6s1w&EK>JmdI*6C|MI$MYoPHnD+Gq@7j zkVLSr-R_)tcQsuF{&`$u+KEshe56U{YlLv$O9J0HC&rqVX;9k}9U>T^I2y#qM6TAg zQ#sq4*MUh>ZfYj7gGM9)F|#M?ygv)Mzd# zbY3|h-6#(*1fn-vob6AYc$N?lBKB#8OB@r~U!L1ct!B|d!QgR?qJS5 zhGej$OeZo+dmRzWAF_{%bPzLgI+GfV7|7ANVXF58`I>cNUBx|po=BkpeB;%nq6^No zpgGSeY}m1?ECwq)vd!eKB!Vv;)Vf0-8BJ=N20~MWKUbDLQsQsIVS53bPJUBhz1_L^ zH~GeCi0FHfx`u6Axe1vfq4gBo?%Ja9)It;S*+j;2$iu%5 zz8$^pav#i>;~JHx2Xj>2%g3Kom`bNhczke63ICWdV%)0p^8Fa)@#}Ma1bZ_^ys+_7 zd|UP}N&4vXuM${yEwh+Ho|)XWb(pGkiE$*08B)V{Ly5U%6jD{jys={%cZ}x;{=t1y zWz*ET-Oq$qX^#MO;Zn^?t?X=ZqS==rf8$(nUGnx7NA>XQVSVN7Z{Wr>}n%C&xE-PL#&Y3fq3S7owBkE4UBg2}SMi)eZ%bZP8{Y_Cug z;16ZW4$3Yne==g*RWVAAgl`roFXHSW13!%48NMb|lm#S(fPUVdzpt~l1Bc@zD(Tx$ z#>xE`PTBs8lBMQxLeuj{pOH%x8@+CqZXD_GR@Ib(TLMTA z{8iZu5CK3%(akL&IGH6-ac^z5%I#DQ-Sm@}BPb24g;UDEk6}ZrKnd7bq{k=eoq!)JB9CD;E3MBt(+m0qO^R44yq1W7w)PUvpl(0#OY&=F?!KH}{%l0#{|g3W)4u8Jp<= z{Ohv5XwX3qWB&~b)mIgxjBI#fS*|1XGycT+tIzJ5-nPCFAv5Y`eps`59E{1@O4xB zDb5#0Vzs2|bScF_n&Or1*sesACV8auK6YvO-x*)keyS^;cb|vy`%0Nr?HNZ}myGQA zZ8W>vLH2U|YkCTQ2G`ShpX3uCRQUO*n*?37s+*Fga;8$*9T)&p3yFRO@GH$AXKQ}& zsL1|_v=!&tG}!0Z!n}?Pj}R?agQfaBk8??CS)VFY$KM&dQ>TAoBBd2J$e#M6d2?h9Kc*noOx6?$` zd2R^YLVg_*Qv5KWpiuO8rFc*ON%AzGZqlkHhhfs8a@nUL1UxIC8e~e7{h^JkXel#* z^tuv-19B3SY)wCl1a-A~TV)CHKIhTx1XB&Dpu|+Zrn9LSRM8a#FtY{txBdAiT-Ipm z_t})UQCKg*aG9EO8Vgb#dzvQbmvF1g*X(Xf6tN%Zb@t0BAE}01$BAq&xu7 zxePR=`m@$=AU$u`n-|CR+{x~fD4kHO|mzeroD!Ha|py zV!;AA>s*(zm+(lI$=nbaNyTsc2sX7q=xS6m=b7THM>k*jUWLJ+SH^UIt}0lCA3t3PIY0w1yMWk|rDJ+&h>bzB<$!(s->8>4SPrHY~P+o57%KF*T{T2Dwfen z&738nM|Ml>`sgjlLjHgh9Rv;hmlphgSV;L#i=E!~GVg~AtEKL3AxE=n3@6aSu$8Y~ ztjNcqX`!o!V%?Y8)j;O_8VOEJ13OJSO_n-xv%(tTIP zEn}(9@-h}6*_wpTcCZGrxDsV=*COgu$5sNaD5tfTZ@lDeyxu1BEr+{fMe6T(>yF}1Vt?)KqFk!YeA^$F&O zS4Oh0h3Pp_(>*~XgIGbFyvO~DHCF~)f*I7&>zaufo74z_^<0}pB;d1&;@xrWD%4LJ zRu@q2kEhBgi@M@dw$FKsCdvq1YW+GJ-5d=d&Ifyoa7BDGIi9DnnDDx|0;Df!k{wX4bn`!}XreO$L3` ztM)~oG0v#a=|3pC5H3wBtu5pWn@2wB!)H6Mgi*%Tp}lK5{?3h?*Mxat?vaNVU$fDs za!>#3xcbSZa#HOwVun3tjBt095qD}4b036KyONjy5N@obK~)ojQA;EsJ!@iMc2WP=*JdpC;NI{x{w>WqR{r$n1RKQ_s8n3C3boR91D3I#)+LnEblA{dk|tFA}a zU!kH9TX{iA7xB~{7j@oOQSB^+m`z2K3LitcYxHvgsZnGNO1PMxfS|k?rB9&a5@B=p zsXoiK{5F@z^9-{97t4J`IXI54SnYXyYyv3u5e!BN*|g&Y3hdyV5X_X@v?Of;+F^q} zjUJwM+ni)O#~0zwGWcZ|NIl2pZ#PRDS)$H3rZX6pNNhOD>{Wn>{<6EJOihi~4D9li zO$mHz!uI5#9)N8 ze)|At$gyekEhS8FZywYZqCMJp)EPuE2mWsG^=0Kp$bs-Ss}CV8DNfJte0~vZOOiXi zqdPoYka3k9Mb$8RlSWIqtTFiLF&sJECo&+R=*lEUv>D#Rc_Ng!{}Td7hA@sqEk%xL z3TQ9Wh;vZ0vfhgvnznmFxB1wmfBw zeyJLO+9fJA;aqYBu%KzEZd{}2)bR%_hy}>XnQQU-d@dA-xzQYX)nyjA}^paWGex z*ITQ5(RoZqPJp5pj|ZzFbsZD?A$+A>U?op2C~*I*xL~Uy1m4-?f$f$aq!AgwG+sEI zKTV>fPI@}@md8W$B4)v`-OeeaKDXHGPa^eP+DN1~Du+%%XS5`F@#UNLRir&P4)nip zNPM1AiL3EUbPwYqBylQm*%+vJ zcebub2$#S{ZwH9z0!~KXmTI=@&a2;9{gehmK5B@;=vZ#9iDalSp+7R$&^671d9Eq7 z>#qTz8hiHq%A_^drKo}>S~zf+tkwIH>sLfq?LRH`xR11rHF4ca@R=70h2f(paW0HZ z6*90YL=^h5dF=B{h_y-+R3Q7!*|zs7TYC^UEzGm!iUzd2cg$*Nj5usnNLQ`xg; zp)d=++Mdjn&_vGb@fOjFhi`fV2yigFK|xHSKndIoU8~NIp>a)V>x|{9naB?Ahh-suxYIJThiRdjgdYqc4iBxw+sPNeWWnNUw zw3VhN3o2~B1JskmUM=v9f=SB%WuN{qRVZpG6-15+0om_9(3))^efr8)l^BDEoM)k;eoiUoR52GXoygaUXOD4Jnc9{Z_h|r8n))i+ zAl4g@PgP${+GaFF2nLK6)|zY z*mV_m_I?=v6d8yzVdFBiFdr|g^5*S0_7YDh){7kmkDXb{5rK+Or2zE_uaZ1I7b1wK zCeikRxRtf1MB>FffG)*-FymDlRenoP*eH>HkmCN`@jglP#lo2I^X`Ja<8e@F`d)z& z5@i`FW@q+pu(?MY&oz?PmUpQhMZ>TovDiw=n4O{+-6W@EUed5@a;1-e)?o>9QwgdD z<>P`SoH?v(F)Iw4xAS6Y5!QRBJU|u>}n686;+0Rpli?5h?BG9cD zKxB2rnXC&YZNyXHYqJuQ+13y05#B9Kxkg6erFdrn3+i3po+;oUbJ3~daX=V2BhZ+1 zKBO5S9g0^eFE?!KvXJojt9QkCTqZ518;lWRP9a$&#L4s`&uuO+NN`UOEqmV(`iQA7 z5FO_l%K;0BS&X6#sdZ{|1zi?g&lLg_C4|aPZj#+KFm8gGhNJJ5@312SoEftp-X=kO zV2)BuIs%X@9^9%ThqyNqaKSC4Kt;v3zpSh|Vtt9Euea(TPun~H>qPrQLGffR0weOUs#62U%GgpDWv{f~2asMt0`7~P zI9}<-bx~we>!tM_BoC(eK5+l)%J~&G-a!G+oI2AJjmI%C4czjbz5NNPQ$xqCk~xZJ zBzMq8i1Kx)6h77z^6yPlDE$g*I>9C`Kg4gbr-o+xhKb)Me^H7`=PJBrR0&c5oP7eW zrO6%uxzAs(3cWtW7o)OX^zCPHp;f zshXK`Hw&e$8I-PR(G;K3R};~gGJY_yZ)z*;Lh0;l8dc`+3ulBE6oa(&t3^HDWBVd| zl7a>Ll%fidtBx1*iuhX^fd!=kBtewz^AgB3HA0ytq~hI>KQm48CB&u6x>7^a+lSe# zOr#10CPDyec#+{Hnhii{>ih}}WVkq#sBbw}8T4CTtBlP#x1Ox>L|$?HTw4%sZm38S z1HoA}+fYS=0u&k_Ww2VTBqNa=qZ>ow+Zxb7>w29l5L>y8cw^Z`{9b$6BCtAz<7N+E z^eKQzPxsm0A3+54e7n@s5Pz1zw zac!Zzsq|N5b9AmwsJwo);p-L))tpbTo4s5sa_Q0K{#>BleUfl@ zul+RyIln9bT!VTiX+$KrQfHt@w~1v8?+@S673bUW*KO}U;%+|EG&|>3hGYl%b-y-M z3N9_a*9TLY-YfdIUsndno1Wr4zYwc%huRLXog@4?1Z6lZwY{owU{V$m8JiZ%^ z(e)8C<&^+mMs=@&dt@$#SDC&?Uq84MTZmB0ft_yZ-2q5fki7!X^3pPv`&U8FBXRCN zeIKE^C%P6_{A%&pcDiRQ#X+VFKfZLx?v}Np2=+4VvKTt@#%1FSF0hjDe22}Hed2qy zUam>ljeKu~QFCga7%cGbGm$3$7*o9k@|p}{-`Q3^1$#dwFUG>A9u=6Y>G#>K(9HE= zoLv-`K0e(fuF>A$p_aexP_%pql$;VoY6D2ieL3JSP06WL-#q<46WW$NNkUd20Ub}V zu{~^t`?FqD>v;G{b0^IOi{aO<^X)-^E!)E>_SJJU#PvCIcR=JVI(4Cy6O2N17z1$P zW=6N3_uJntfc7zV`NN_!)JB?^wH0 zh{tg=5a?01I6n4MKk--#>l+nYp$PJA^O*t$oBD&?bHpV-I$1S`unYF@#Mnh|Ye6j! zz4`$J#c*TW_|n1ayg?uu4}q{pZQ7RVgy*oZC%tNA2gd;Nwm|n836_6C{X@`(b`c;* z2lJ`2^#V4)@iGH);<{GnF5s3bG&Z0sn{zXKyv&5=0Uj|S`@sS|8`V5=kfo~u@HJ!{ zK0s_F|LvX+&_SV0%XkHdaAZJ>2%L;0LGzCWfa>$;FQHQxn^U|Jc$$xW8-yPpr&@F` zfI>#q%C&k)JY&d6KD`2M;!mB;$l1F+`Nri$4g6(u5#QjYck zh*Aa%Z{0c>52I~6SSfINg?>5UmN!8%oIWn5Is>6htJ{`DNsw4gnCX(U zwa-nfwYB_w!;3*q1m=r`y56F+jQp_t(46qxMQp}9YAea@&}_Ixkdl8lFnG+R1ok3-Y}&evaoGsWzehDk%J!RzPCpfIHr>_4u8T++f zkmjJpg2c@-dgD1zER5hidcT!d{;_0n6d}dMK)~NfvwAz7o!YmX!g!d%yp5~CK(#>y zB36xHlB&M>ik|~WXC^rcU}bq3LEA{YHI%{~1Dyy3q#uFIH1~?L4O9#K!U9ZVemMW? z$;om&HvMlewoCzD?C>_rMliSs7r=`xlvV!O#YTqH!6Yfm_;a36W++)FOXbWU89lTS zT`XTgzAvZ^pySjGpnj<4Fy7*%AIE7AJ`qA)9lW+sYYDPPs3XqRYwa(a@b`~ie&E(! zpyf#A28tvBeD3?ci#T<%2N>{tYWG}cKNCOsa@fB5cz8$gHZJFY8(um{a6#8?p&E3f z_LD-L6cUj44nX_2cq%|zHv~e@EFC%q{1mVm<-+w~efIvplwYXcN# zOE5)vUb(xSoZfv=#0NOkKFrd1C=BR&n=t_Cp6(~u{hxmY{1qQ`loU46+CKtM7XdvK zaEemCOQ%1CtBs}OkMk0hfsX^qPq2qj!yDAlT@3JE^~YvTMODq98Q>~RV?FzCN(Sn2 z{WC}R)dH*nLkN|xV0Y}?YSVBE?Iwg^xHepW-Rc3GQ$-B55r07J03an5)Btf|Exz|u zHU0Z>|9*0?BEYL5mpy?V9-zQ0_s`g#6Dp0h;;?glmTWUS6T z)dcv&;8TYIn-xx2V4?qym-x2~ba8;4(MTEL1bS5l#ReV$vs|ci{$)zm=*H!LTXMoEZu%Ku z;yJ;}WLcuQBErl{yI|m`O35yUoBdtL{`-zovVa%D;s^)2JBle2 z|E&dqlQ98iTv8l-K(WJ!OR)sO1`~zM5|wJmNg(yQNdL~o@ZTQVhy_+7+8E$6$|?8~ zkDf3)n78rD@l0kP-x2@+Ig9_exBxY9A6cU6y%T`b##c@+>ZqRt+SC>7YM~WfZTjzf z@Jo;WaZCY_(wm8>k^yS*3P9+>*cuhkqKw$^^rK+_@o5J03>Pl{-YWr7VTO{JApwqw zV1L@7r{xG3*%Fst1RdbD*Ao8YLjKbhWpY8DeWz8$d+jes1e3($k+g-Hyd3$`1PR`vIm>Hq$;`XHbP@6r)|WW^?oAOLCq z!0})ac1Q@&=bmx>R~z+@V~v2g57@(|V}aPQYqDG>v8vWoK4X-_%(xP>{o}v?`rjUd zieUvN>2~cSIM|sR1R~cJB^a?2R?fb<9qkMZ$G@-h|KsCdJENzxGx<)x?_22n?03J< z1WuuLS^uj3|I}XzW$`1<(>9XH?y+bIr zl%-R91tjj=6M0^o!q+zCn?@~8F{;0(v>^LJ3|*pT?r{|$D(rZU!169Yk{@+0oa-Mb zlK(0;_eFusn#o9uX#wsI4wQ+2QI<+1(c=V_)q7OA%-NHGR9|ZRc0aHgzqhhR0!WuO z833`xtlILfA_FvxVC4REIU-BafTE{*|?QUTDJhvAK& zwQcl%h=$yl$Q82R)uP*Hz&?8mMRBqzaUf&-TN}H~1nD{ZLPQ=ocUGKU zqSE_sR|gs&(*NGq|JLY^is2k6#JaIp5W{r2{~txL|0=$;>fi-bp1xTECa(UeJW$kA zCH40Roa261r`P~tbw5|d1z4iIAa!d_$Mf-zc;|l~B&{d(<38sQD6o24ycBZC)#%3W zU;M4T17~yZbyN8iceVQyvxgy#_5Py0iMR|IPi?vm=9&C*>Qboy)%=2ywtYfJ-tKDFZ84?T@b2z^O;-Fv z)(6nw6jcI$fE)ttLmYHTxqwl<1<8*%803fl#%k0uK@^h^3uRj6v2n6FIMi+=0n9Km zN%{A_4n%d?4amjeW512`-~7ox#d|;&d~c#-B!yFO08j0s{WdjrT7#Fd_18CmpQN29gyw_G&Fu@P@gB6AH3W}s6&cUcZNm0FZUpvEN{3QGRFVJaXj_&LIkD?=&ArE zONqJXUKkUq3vKRV=h;Ms;Q=1gOR|_x!gVDid+AxQg5+_#e{x{_hd23=C_hS68b(wy zrf-Lv)cf(;$NTQ*Z)9FTd4$e)Wqu+=nbB=jMxC(T%z2l4?#ic%uFN}3o_OWY3%}(~ zemKt&WCVz~SpJvkKw)zZ#N!nHnT8x?Iv;2_?_qlF{ji^w>8oFnk-QKNg;t#;&sZ4k z4OX~NJFda%&Zr`QIQ(3@@>6Ei)M~JW%+PAaHViQddc*6I=I(h0C55!U)XBnGn6C%n zL)zoJ^u4H*P>;VS;{UPx0|t2Y*Zqdy?Od+`@RQ{9>)CKkd@XO@zCnWb?rG$T7 z?e`@`{XDkN@LGN*crq7``|~4z(_`hAZF2t(P3{w;ASw!|({YyAVqanG!L)Y^pl6uo z*Q{WIHk*jzA*d9z03NE1uMjB+)c$BZ6!D3DMq>9<(zx}njq?hTA0;TIuQQMQ=-MXe z(Lv=EQpo@BXu(VaO9-xG$fs;%CLNu2hyHniDsvGgk0HoV2!9E|buoD9*CI2yQB-gv zR>1cnto{wKXNUqyjSI?TRZzbPs+biC2z3GhBI?CpIyB6<2mC*?Az2392)}uwh}akZ z(kXyWuU^4V6l4WC($vh8982S^cK!{gyBQ&`!%7{ckoW3lSqlF!7{+07+=?BN_ zd!&+4$Fj}4+P@AYhC^@;r_gJT*Q`Y4p5w#2zcvd$6#RU`^Mq?DRI_iJ3x3vFXx8q} z37$L;r^lhKT7E9t9dR;kpG9q)t(yZs5UHBY4q+}Xc4?ouxM7il+t7_t%ThJZ9lhSzFu+5?|4;siw!$%HIMz;y2fuGFS z5oNi{@#|8Z<3^EFSq`7KSn(3gtayL-m-j6=hZjarMrjqfFXO}AHYAU2-8=}E2| z`SEt_O>I8|lNS3Fp1`_AB8onVtYTPqoe8}wFGPVku!Hvr;AuIkK0@6EUx!P4L{Irb z0DIHsp~uI@CYER_OP=%0R~OCS2Dmb9D_jOWR&7YY%M=3P7k;c+{3JT;sD4>yO!`v;Ht@EFq0(!qU~yl8fo}K>>epJOx3wbZF_M2>mk@UFgsWSX=+G<77^1 za2ZLfv9r_Rq+3X~v*=_grofrqr48CR!2byd1}PxK`Sn*2sZeRB+Ehk;R1^ul1I{>Q zAcAsE*7^rCc8P_(qBv~-X!_>`xS9L#S3ox8uI83tWF3lJeijl8N(HYOQm$G%qxD81 zIxs@vtQrD|s0LBlRZCm^D&Fcp$pQIY@SZ*^Q7uMG==$T14(AeZ=~8c{pw+$;*iI4ZWgjAFDXSWp-bO zwqhw=aYF7}X28{0$5NI$but;scESmK3U>-2ML=8Ap3KmNcrH!8W_>+VFWrr z&XOx@?~S{zugq3_W986#p3T&Q@7rSg;ZF?~Ap*TL#3aJ%eW9x7aZ@G8lu}6pcll38 z;vLsb?Lbk_dv6pI_>KY^0~JXL=K<4Ne&7AbErY;^J%F}PT{2-SKtvTl?tRaSJ%Ip9 z5F1*c_TxEsZug#C$d`ZZ74h09M9v;Uy!&PK7L>f92Yj$4f#Ad+ z?8lQDkq~&O3(=l+DEh8@eiJbA%)XjF-t^NUV%4JY0)$aJy%nu2Bz6V!W4!&*#i7S? zsep5H9|>~*2JYLZdIl5$r|+Kta$vW%sRv+m;yI{eYkIrLjOp8>o3GxPlmG?6hl(O9 z{XSVjX(1|gJK^8ne;U`#8-WY>x=S6;W9@q$d0KyOqj^wH7}OtxORZ((dd?qf4U}gH z63sk}XEYZf=2rs(7|Rd9B1j69;0{deN2a*f_Hx&>CadnldgGfoGB>bBnZzQ8Y5^}# zGa$+wt~gd_I01;w(+>H3X5OMo-Etjiwb}Yl4Owa4IVOODu0@+2^=Pm9^)dg}A(QMp zXs@~qH6ExdXB_m{pSO>OIAiyz$ZnDV)>^e;cfk1Na|Hw1{ySy917b$C!Ob>>LSQf0 z*8xf%SBWPOv|Mm9`s~lf%f=q=Rb(8m%L^@s^i^MpYS=!vk}^hCQR%}x_n|HB0jcA-b|nK~ryK%s7o;H+ZuPkX@|_^y<5%Zzo}n=nGzA1=&XN{2 z@YVWHY3y5oNI2*el_0?wrX?{OxQSRNsO(yMus=YKZ3cKJT+J|cua*D=r(7M9jc$Fvm-o4y_2DVjc-zI^Ot%lK%8Zcwz$!p?ES zrx3UziX%3dbDr2m7Hp|+Qe?kjGWM*^-flmrJ^uP=07>Ze8sw!eqM`^(zkf<+u9*+B zT94w58)shoQ@ucX@(w#nnj9_{ZaRT3=gQyY#HN;C0#@x(@t?#a8-ydu^asF}@T$|i zOHCWstXAnLPp$-%NF+SeQd*Fyamq*hD zl};csoqd@VwOa2X_A&Fd2nX}wKu9y-*quZem>IkouhSo?k1E{*1WIHOU@r-D^SugF zzbv*0`eh5Rhi>hZ_{uQ4wvsoD#aCC>E$?{ZHgNhlTm}_hPR9t0MILYVN%WrcRq3xV%7y@rKGc<^`>Z*)tYRwbnuSH z#wMq!+>cqZ7BE2D)ZyM+35VT5+PL>4@bCic(u1pX|LQgYXhV{GLCL_gbKiq0dJSa;nDD0hX;UE`k0vKIuZcnm8Mr zKo&qAKYp%Ym$b0zmz%+Y@*w|Zt_t!;&lH$ds#bq8@*6H%IjeWwmA=`B+_G#uKzJCi zYl?F0s7ou339=Qjq_SqS-xhWNNrXCv1Vri9;vM>97C7xVZ+ivVRTOwr*2hay(}Yk@ z3X3~A4zW_#6lLwA?T}%HOZ2s0Sb*(tihi&eY1!y5mCGd#MSU;IN8B0Bz-Yrczi?WA z3nTg>3WzehU=pptuGd@sey2Nl9`v}!X3rV}lMuUY55OJ|)^GiSDZ0QuFC8Z>W?yjLbr3L{7X{Ecno1q(Ih@ra# z$sts_zk8l|pYPB9v5$S|vEg28o!5DtfehtF27kGfpI_k<+x+YEuZ!ZV?UUNvPowQ_ zJp}-j=a1+9m+ybAaXo*0H6O7>i{R_R`m;<3H-OTp0FhMp!6juhV1I(mS6u*z2t(FF zUO&_WYS&P}=w8R=v&aH`wjJn8oZUR-eThk;3frU;NM6^Pj4 zB7+js!bHy|q|A_R`WyehzwBJW)>diO-z5SGX%p9-A*bnO<26AevQEwD@5h0}ikhhm zh`cDw9Xqekp@tKgpS2p?S?bjwoD9>S2b5lLm&wQDVjz3OZ~+-cZ`bvu9A|^QVU%Y? z^8pIDk+eQ@*5QV$HqM=o$S!O~av7gfN$YPT2OTBx`?JkknxKzkHVhr?_~jz+h=Gb` zdBL3&3(QciaoR<{^Ifsamfw*kcX?KismV}dum+j>6%bb74`?pmwcZGsLPKD0(cwh4 z$dFUx$i6SaXhY5ay214Z(*)7_Hd&q$nFj3WvBn_9#=|Fc_e6hAT#>o2o9cSJe>-0| z0CnU)fy2k9FQbykw&kbVK8~8RzN4jrR~DTpK|l<^xujzqQ4tG=*8hVH*mS=N|Czxk ze=L%%#;GyKsxZ!o2LdSriF#1}i2)w&*4!;pO6m2Rx#nKr<$VP_OJzljbfsFrC*4tI zp)B&Jzklu(-5l_Zz5vW+g~(W84*%aFG|zyHM~f67_o5dWz!IbcnkF%PNd z=une8!q@`{@>WQdl!2cc@ZaC%Op&b}b^r!Lv{Di1@grdQVtS7)odH~U8h>|Akm<81(lI6w|=G~xDo-(P=H=ai_hBk`|gM4YXI{;17Lw+AvsjY z^@RUHF=y?3L|+;=OVH(Ht8ksAI72;(?lbc9_z9q~${J+W@;9gT*9m5$ zW!L9@EcBr2cwlaE|8yX?@kgZ3_6ShER4r?%n>zt1*joG8$V&~2>ZJ7=_~mU4wcjkl zA=;%ofX(C>mN3$HWZgxx4imhXdTU8;Z{7tDGCnpDc`@Dj`|lDUL0ca&D!22y^Y5>J z-L?Mv9p0taF#?(A5O*usHT%0G`a~RXHR-Fv0bObWV(oMM9svB-1Tv;#68?x{{}BoX zihSDt!hQpPWz$K@A{}tp0+5F-!kH+ddPyK3*nUbg4O#97`hCm!NuEk(f zsqkcf2tIB;nNh<%lk=Zv9h}{pj=m7hNNSJ2E;WCyka_>K-F6H>uk8J=%7CdtKFYhA zq2({gEaLhwePgr$5K3_lkQE5|K(mM~_ltMx4{)b2IE%zjH7wF*?gKad%ApxGi-}^+ z^11H@ju+ z<^?|_hTrby3;}32Ris`VNF@j5{}`8pvG#F$(fxT2jFIcpuSal!CS-KWZqaHH$4Q00n9p06E9Lz(H5k4q!nUZJ1V_8$# zci*xe%5z{&c9ZzrfmAoOmR@d-M4Z1&I2YXmM1sSl#eXPYW~aI}r>5HI;XLCHY64K@ z6oGQ5byp9g)5-rXwQRy4iZTn%CY!gGCHe=ztdmrOhoI$eU2v%&1x6Or&G+?yq%<@( z;LzgB_?P^-6Ub#-|12gG+uApJtq*ImR=K#3I(xv+twNBI*F6KwFabw*Ark`f+mY+E zu)7J6LSEkdkqde4^C)6mPO9qmzX}^zcUZ4J(%$`v4RmNZa9R^7$_DP?U*9@^LVg1X zziV48_E*sP&J+;7pY_#rxut5TG~DnsOn@>AfB~#>m}^x`1L@D>Jj*7}70Ksj7rxs} z7^bDjttdX+%y%)uJpjC`*?mN(yuL8n4vSc*0%ob2Y2|lX_(RX_0c*2UQEL`QB-Anp zgRG<>a2h0lhm^(TKDclc+hX}>%RLjAf}?(+SToN>k~_aMv%ASp0j;xbEh7P?JX+(w zUjO9|#+YAMACGU)@qwG%Wy!V}kEP#tW-O+|r=wT&8VEL4>8y7R3m6a_4qedT4$S|dF{@PcG39^oihDLLb+O2T-&7ZHGKW1wQm?msGKzRpDSlMwjqR%NeZr%WbxG*4SvCDpH$dl+L z3*JVzc*<{Wmi0gKw%tYEY~0Kl*F98B&mnuVcha~jhQ%L#E*@`$`DJ+OyS^W`Uj`yl z(+dnt^J5UXl&_UflA2rrXskF_ZXENG69in8K@DaFjqI=ODiLy2uF_Y)RG=<|aar8X z>k4i#BRnuYwFk(H3XfU5j7Q3It11(ivB#uL_yUI791u89;nMT+GZc8B<2-`;7Qc~K}`onG)Xwf+vkYW6+M*o$k>6akHAO6Jl0gyg50d(^2 z^);1h$z&gM1CWe~rOy34gfZBGY}vlLBgtXolxj?Tt`aH8H3Kxpc+jv|%U{tJT%uB5 z<{+x!KYv!var#+!r>FdrY4&IO$Na-O0l#_tx;+g0czb2}IrjHg0z2K_eMmsz!-6%* zz4;RfD55mHD`J!%ItN;=IN|sBK`Ve)o%o||33TI7Qdp;$P*BIx-&$HCB=QL2!n|#K(fC7E8!J7T~Rp={^0_%<|l^ zk?8J|oPmqjh8*MC-pS{W1&K{fzR%Jf9X8D;;RN;$!oM)~n+e}VoyE3PUeu>!uBB$c{%Hi3tZs0jcXNJT5dDq^PfA)n@+d1ER8lTtcgF7|j2dNDn`>Do7J z3&fHf7jrhPBtzOE2BB)Z5}z9Z^lhQ2bxIYo5V9*IZU0qpnvcPN?$}pQys-Aa0?k7A zi%m(r?=0==$;Gk(X7Lxm-==6d40M_wWkH?!4>UU9_3p0p>N5`jG*Oz8_`@Die*zRB zd>33B1-wP!su?F-1;JFM-QV$FsCOFt4rtY;9X0Ws)sibq4c8Sn(wK5REVTZiToL}7 zUoZuF}jLccN=a+Z%rsS8&tpX+G{MJQsukFr{6I^YW((^5xY z0h;W80(}lR!1ZPH_cdMmb-Rxcpl}s9RMh`O!@5ZO>qr~j3@NRRZ%wmhJ8C(bQtU(K z*Y_$DwR16Qe#H{*Nd>8>+x;!#rpy)uxjF(sM`T#musp~D z^*!$t*D7#T$@4iJ*GkH?hB}ccf8*H)+!_vLhyu{Bp<5meFBbwy-iT8;GVOV0>>Rek zYpdz%#>)1Q%?HRGE-Ax*&QjV^1hBAm${O{rq(i}(VYetOTu2}VyCw|(rSs4zf7`BwgX&~V3BvR|E-@To)R^-kjE;>?@q15=LeHF-OV z&)pTS1#br`i&Onf@=v2?b~D={E!N*!0jhre84`?XSO}{7Pn~WKUWbX_<+{B6U&uDw zakNY@p5@#gFJYk7e9%9hyc#>eU8pj3fyCLv?yEpNc;*TdA?m4rN|oQ(%%YNB|2u3D zk|g@ot~bfDYjs3DysV}oRAzeI8{MHIuSIao}bY&CB1<&rqZ(14h z*=gI1U?VDK-C7X1m7mO0o267*BIM6Cq#waj!LwJf0 zi~kF+pU8Iee!dsl@PVHJNQ+lVWG|=c$ff$7DJn7o1`l8Y%GrAJs3@Nngq!u1Xo~<7 zNO%gXvQEgGpqUqf%d|YhnC9_T{D}w z#kkO}(%R;rUDq1pYw-nYw;yyrf$P^o!yOv`KE@Z%!aWR>tLY%!_OUNJ@J%;MajGc< z*+N1Zr1o~m3WM&CYvxAOGU{d)aG5**@c*#fXD|UANxgq4evB?VF8`rCr|<}_`3Dx*+X5Nd4ANS-k+5ucYSDTpTKf^*mYh_|L2q-BfQNJUX|s54& z6n5lQ&Yru7xgJjguR=NmV88oUcnAi8u7H#nR@9NCYy(+?8{g$rp^HVzoG?M(@%s;#IknEN~sI73V*KGbumE3J$zRo@ekqZM{4 zyES^sYaW=zW#Kvx{SCNyF$ST_)sXNbK7Y?Z@T$iAD%hAhI4I;J3vA+ zqt5y8Ezr7kJE_)o{I}(kzVdQVRq6u4*Ihe&zl{Q zL^SZsIvdfEd7kF;XGEf>TQ%fIG5bYv0cNlQN9+vfu@2zx`=u}UW{_ZSF+re!ydAI} zj_k?yIsq)xmxE3H{LH5u4XR^*D@^`{w}1?A)kfu7y#{0r4H-1^I6@g1LZyQRj%+$a`97iw;)5f6SB{wBsgn=hEe82r^MLP9#D zzcpF*ElZzEd&{plMWUZi?EQ_?7b`3)?HiQqWH*WWukB?<!R8Hi0x~t7G)?pj(Ye`x96jRW=s& zk(jYxjsqMoExSRcg}0S_DNSjqPLer~~8{s=FVb-{$fdA$yysglew|M9zB!!c-g;dx)u307^+ zr(2ZlKT&G*DOkH%eok5nUQ`u?#QeBB&+@d7FCpFNi!x5BOvHun}))&|kkNU9+??mYK9y)L|!`w!b^DC$$&9gN}yokARLF@E1O~ z$k)!zlQsY3+au?|<=@*Kb-U(FomhN*&4;@eUNJIn@9O}^#v6Y$$LFSC_o8X(X#)Jf zoQnV1DLvSNjwM$1}|kbip+x zt|ixbh5lrbMiAkM`{GE0ZSJ>&=^BuM(gGy8T+b}i$1Eo{OMW57|EF5N2MD@|l2;dQ z9NEud?s5=Pr>wcwwd@P0kpw!c?&$fn^KE!#1vzNnn7fC9cJd~b#|@u2aIOPI6f)@= zS}`;`u484}B=Nfsf^Gs8$ zwfjVEJ;?1rxQ68SmV`MV^hTM2kw0wuhx>skF>W%Mq2!b<06x*GcrpnKEmz-7oxTV_ zrSup8K(^j7QRO4vOB#0fvLiOWq?x!uA-#kL}0Ld1jzWy_r((J z5ev1c_NAHgTlw8EPr zj8Bf*hZ==rjsU_0R*>ojt_aVlUIo7ReT~UFk?cRujDVa^&u=qXudk_JekZt0^j-IP z&al0B=CFi~Dx+%3shAokcJ&sh!2gwwyzDgb$ z_V+WX%#tLo(3-+y;NdD(JIUXa zcI=<rN2XN@()3UU(hlnTF@&?L9tb-dTzId0Ba1tfl&SpFqYD+iX9N6WrI{ zzI)fUo4Si*V7ULPqY4Gf`yck#zpW7+YIC_K6by%zRFR{J2bK4EH>~F6@c`^&ZGs-%nJFT`kWDk!e-E z8U31b&YMb+oX9w0L1kTt2+#JB)2qE3B`l0-ylu_Wj%C#-v=3(#B(=%I@xTbXQc)>y z{|d{6RxbGb;w;`0@;sQKm6+yed->3j!JW`nq3{=w%z%B$`FK-6L%)#Za|-s{uQ0~= zM5FwfmS0}<(TLV88y%mH8Wu!^-mqse~s&}&6rTbf})ZNjR0S0d>aHv&{r8x)1F1O(J*Ug$u4G* z_-iXq?>5)5F|Z)=kF3rgv|PT_t?j4fT2#|YUM}VO;gW}f$PY+deqs^JMjJ_fK5jkc zk>{cOVFSFllBT-ue3`^)5(MdMVe&ku(0eV38b6w%oPPwQ63{lz?j?40XLif02F$#O z9z8)fW&VaU*7e7F36qK=dxodgY!By592OYyHtP_f<4^~a6UB~ZlB*&tI)zoNt9e?C z*s{@vx*jsNkrE{**b3=v+U9*FKvC##tUC+zSIr176(676A@b4WNh@~5mSZG%s zvz{mE=@16bMG7Co&R%xLk{s?})j&^h&gFVc)IdFSFw2z*cu$4+7KJZWJv5Bn^N#Yy zmtwxZqAQTs!(m`<`Z5P{e}l0u4ztB2nXxt#LY?1s=vt_|F`Te=skJu&3VB4ef~uW_ zrhex}LGKIBDP^?RbGL3P*8@qe$eZYctkjl$fq5?k4#a$blg>g6`>?o3KAW(9Q#olY zfj{gBznH#yMK1+@GUF%D@$AD>$NF|>_@-{bxiR=1&=sF0v4g6cK*N6WsfPEzCC{qS zXr6LWb;I!aT~AS4vS2$ZXO6Q}lnR&#EX`&1b6VZ#03QgA8NzR+PM24+N3duCv5=o064Dj7EmxNc9;D8gya7NX29Dfm!fdxWwTZ;w>O1!5X&y=8unp$rP{sh#i6Zc75so!jCel7F zNBuguWP>wP+!wT1w8^Nx)s&uidcB*SoeUmY?|RHHVN5DgN+m-7Aq^f?w~c)BQ3{|| z0Z~FQ-TwX}Fg9@sk+J+ayxCRfopo@cc6de^=P)tchj>Q4R=+go?ww27IEKj zl+hRQ%saII8T5&Sne@1uMJ{KJD`L1^?^dAI@X{TLaj%o0>Z%d1;=bYV?p551!M(tF zd(9w}?0?H!g?J!-IQ|?9OBim~b+Hj*P#bCC4;_L09cGdgGHSN%l{r~dcU)?)#n?7j z8GbE}J+PiNE`veE`bJLj3nUWv;QXZYE=R6XqvCT#6`wGsQ%(V_%hp^u`x0{c#MTou zv)<6YPQsj{#P-uVqUsj+y-T$xgjPAjHR;3ztl~C;dJFz|=SK({XMLMR`QhcfGX~~0 zYNEb2s;B(ORZBKo;dl?b@#T{Vin#A^f*Ti_l1cltuk1#%*yiu6#w;zVt|x_A4$`zxCJ zYBzg85#r_`@RIogMJ=sM&GMXDc>D*!puB#>+7GaA4uFh_-SnSH7fonh% zF2sJ^ojdwOlA@f_zsOyUfcOi?$n8Z#tW2Ce)(B8j&W~1#WxC&I_wHl@6OPn zly3-=J6;d#k)D4pfc(&)jTM*N6a(_&40bcJMxPnfGL1Awy}K?$A;R#z29Y=PhKqt2 z%QY2NrW;|LZos#d4#VMNq*Ns;Hy;6b905rd%0ME{GW3)tVm`?X%Q|%xCsvn{0N;h9 zy#48K4V7IE* zD3sEo8j15R9$~H}*&8}sGYf#4l3BZo9c2^u)XB$d&@;2u#*V_Ogj$=i>lp_V^Jad9 z``xQ{C#}6OJEc)))}E4Ge57>#^It8%T%Oo^uj76S^T!dquhY5-M-2WZY~^1V<5r7k zjn0P&Ad|QH4C8lj1##UepoFU{P#o9<(>~E%NND+i7o~cU1Ms9pzif*>IR34!6?gT%&ut->qt}e3L3PKG#z}5Ua2ZBj)@jTN4!1p}-vm0;&T&DRzM! zNn8o~Zz%#%O+ptU&kjwL!toJ)$E9DK*EFi|uPu zhF=`J_|^=3kZP#mn66&Ly5WeoDqJt%@rU`LF#O#3>@D_zv2wQG!+na_sU|1f@x~C4 zP3=Y`{Q|X}sLgX7NnNs=U07h4vJq+&Zk!RuK}>$rpfO(veZO!f(F+KRsROa0j65C< zJZ35pT(vCr<=^C;N?pnVe2n&*_wJi58>q5-cBHL8!IbV<+qc_^{NuNMQIUFU&BZ?K zQ;@DYQFKCP720zYc5&P+l#0Lj*ov=#6N!y1x}Rm1K+)!V)wmfDqd09m%z$mrf-)lN zo3=MiUm7s@`=hfyjyqdtso4?ud4qR z{5hdz+}T$_Hy#et&L8#!8M3uy(Hs`LD=vQRl}Ie-K1a7B&W zj^TE?F6r)k^CX7RQj0=<*Ls0uw4|%$2v{KFGhVJrJTZF9*8)glBC#Tqgp>1bk_SDF zP%&A*kn$V369DP1n0bHQWf zJ{v}C`_G#y@cqO2Tiv~S6^m!-G`ufo;S)_%n_t>?(Q}7Yvz` z!x%mg-oX;Kr)1SA_ftO7h)=tz@P2FZRv{9SRmm5Ry$3HvF`ik z=ST!}meQB=46*cC?CBgX9bvV~06-8OP#>B7MhU44Ymt<28Xhh34GasIxz98xGYoP) zuulS^E*$geXWteom=B!q0&A^WfGEq^02>jX8_A@ydm3r}Za~w#hw}w+mN_&odpMWC z{ZV|~0-OdC3%#0Cx2-cSrc<_3=0q)%cB7gy2l=?mPiW*7Z;~u{fkN61$fDx_ zPsrdZ;%Yr}WM8UfcQxQ5-6z1e*{Gke#PycLU-5Hj)>TMlgZ2i z97*}S2z?3SFxcQ0);~yTO;3EDD~LRoM2V;=R5|5#ndODt_S&5rt*dyA-7;k9&Lt4` zHJ6>)>}kEg)xFy9&k)U1Cj8ZH66{?26voE%GG4Qp`0D`$JZTP^7)fGFHLzMvvOflBhJ?Upy~Vz!QD2)$)K-N7on29HK$D(V@}-{Y`>%O`yf0|)m~ zSe|}IBLXCl_3fvU#I&tdmXmi;%xaP#wOg&4PbQ)8uB5%Ra=3DrZLo3tCPY>agUo(& z?OtS(zAGz(Al;w{2G7X@=b;6|&Zp0fZ*kJmv`L_OIC`MaFCgtS&6uag=h;`@)Fcu| zNvIwsBRBU5tV-s$S#_yjDs^{wIJT0R{xa<~bi%kU6Hd{(~_MDI)A{wtUL_B<$snQ zEVy#YS*jv5g7->9@Z-rB5IO&9RIq&Wc9&dS)h?$_#Y_p0)xNL>tDw?4TSD#t9J+nX zGy)eqD&`*Ij`sTzx{%zaj2yK(14%~g^o|Alwvj!5?Qo6&tNr>et#WOQ^qJk#VCno$ zEQEeJdmSDJ$XNODo*k^;tn2k|^&K0r)rvO(MpF4`-g4blbnhwz7`^%euCjAo3y&0A zZY2+HqNTd#LqXZxHx=P_!c4szY#qne3g;(1>9>YTh)WJ}LIu>#o2vYz?Et^Dp}ZNN z9t1Y+w@QgJU}_MDJpYtT0 zgq$k)<&?%5o-K^N9vESED-V*-Pn*k4DVjBdvK}tqVtf&A*e`ViDcI-fsaS-97*Nh$ zJ4vN8&ht)gV14Qbp&_uq;1}U`53!L0jN5#}W$N58h0SDpm*z^G5BDERacV1=)b4RS zKyEN$PU7UiJO6mMkc2ZSC}WHQ=#_1rQ4bSqv_@IsPbxlC_2Ysj9s(%2NP^^h6@|8* zm+}e%Nfgz_Iua4-_dhZTZH_G}ipTDmPWs~H9~Kf;rFxi!xiIl4NbVkMO6fx=P2>6p zjAZ9vYcG`FAov2+RZ_v5Rblc~@9;P(Q!Lpbp;Q8|hn~w(Jx7OLf#xZq4Hz^@8a9$^ zBYeBa-X(9|qz-zD9-Q{CXJ>;)v)`z8$1#@I+uX3b!R3zn@I0cHfW$g!a6E3TA~eQ8 zcyu)eO?4H8>5?xnP8J-nVjsz)@HsUgPb@i)(QPF>i#`-Wel&_N4N+^yWi>DHxJMNh~!SUdPP#UMIzGCo#br@#QyH*lPt8+oyEq9}lYRl?gEFIM(yxJ9p)vv_VYMeBSvXKyN zH}n+}qEY%jgXwWa+nZ~zizCx{+(Ome774_zXg~g@b+~b=JfU-4pT4ut>JG}?nL;f=tqW(f1$Li^NFD2C%romOiQ2jthrj7z1z5RLJ3;Y zAf+cY%;G6)LFS78I}J}~sJUT`;wIS$MpWHINPom77b{BnvcCOdN>2!zm#Jq6*77gg zX}5cOn0)$R&K+RCk_;yX-ITIS>;kz^+2QgyB7}oT_<;+_(9!B-HK#VOv$0-aZ9d!l)^c59O6Zp*@pWp z61aU5C40pM8e`sZif@5RPwO-0@9oWBy5!U1Q=gO+(H!%&6g=L!8K-GZNmr}`Pmb$7 zXsh+ri_g@@Z&}~a*->M-H9D=fQY}f3+!TIkP_O-D0cHd?_yM>)F}9)twEKWmIhuo@ z(Cz#2?b1<+$}cPpDV z*2NTkD^?vKn+T3Qh#c$$Nc_UZv0%sR?B$zggNG#}x`VgI*#rRowR7SQ(9zLefAc}4 ze4RNWx8R=o2Ji$l0~$L1<`F2O{gLwPJ=!+&?#I(z~ZYocb!c{VZq3o=;i zLYJ72HzI$qq%XWtY`x1-w-?`<-$@*>+Az+}(*g#2c1(yOfJ==sBOXRk;I5DjN7|WO z9+y+#Kk2O=c+#Nrq7n;tJjvS*^$U##TXTHF&aH55?ShgW%#JpUPssJ z1G9??DvFuCjZlRfJL$#|rezyBQ@qm+O@+>&Z2%KKeAw+Up3@HyOm1Un+>nzuQg?`xMj@=2X4cOa3oCkCyJT^x|l{Q)hrp^T&U~79;&Jn)d-FW7)u!k~4oLF_dAj?TWi+nmTZN33n~ayde{yLuN%u%dziL%{*030?9Q_A|O~wx@ zvYpb|Hs<9nW7;D{*}!tK@j16eYE2;?m$wyL#q3na)8{|MJlYd-Wf@90VQj-7l?_%X zCo7@iwiyWuVN*ki#!e=&$!65j zXGS`erk!>^=m{2C#(J8EI&ElGbn9`8xW2u5WA0Gx!k>%4gjigNl3Qm<7weZDtp3jk zBn)lJ9`syYcZPC-rdaw?>q4dZ0_#S&u;Tma_jw?Ctd+%=Y2#QJr!2Q$W4fwR2hzk$ zJOeZ&^fbwZ0*%CLgv!tjQgRs$bx>znzrScI>CPyrh#lbZQO*_fN*VxMZ5$-^2*cT1w@4FWs3A^U+L6B=SGal#H4<(Ryig1m6FZ!X3pMqDkcL=E^*H zrD()uNeroi&O@OnE}15{cN$)dM%E-EB*_Evl9WyxzEh{I+#}bQAmQqJ37N-^BP-$? z;O?=g_a}Sz!qcSCoz=lj>svjI3sv%dHMfPbyODUlX3HY~Y9}zXv6UDFVL43)grOO1 zr7=!}Dfm~z%00X_+PhZ8u2CZ%8acDnOkUllfJrE zs}O0f%ih1CBK=w@?;W80xo1qzfAEWi&8Vxxp{OOA*Ss*;#jmf3$qFK6K_gEh>H>yulwfJ)wQ7N zL%QyOJ%L}if*^8fR`BQGCwiAa+BsIi6 zP5b-?)BX!vQP8v)-gqp~Q}P@Rw~G;f($WfhQ(ccdhqcoL9Kb{matO^0J(Ct!VwF;@ zl}B-$57bvju%xiv{V`F=WF?d#t~8q_@PP3U@pqBP=o25yLk_h9wcO-vH}YYA%q7x2 z3})_3{k%bCG?3Lp)Xv7>ERxULDjNtaTZ})vj2rX^P_n0kY+kQyrj+Bo?OY2P&I)Q6 z<@wBB@Ehf?j>_FUUm2tQb)pe}c%!BvDg97z3ixD6(La@fe^x}P!V5U{iS%c5Y~5tcr>ry$GEV`FcoW`+b0+9zG- zT4?wm<$W+5OCFtMsWIkkc;v?5RQX*kJumTEtZb>lD1Sq+NZCtqc+VLQM5}0@CvH7k zkHr1xQr~yJKRrb9`MX!-(=|40@zxj{w|rf%AoE@ImCh+>l?l*5_B}{3jVcC)PjWH9 zm6*rW#P=stCQ_In{!s4t;aqawr-L95CMM&k5oFVP%pCb)rcoAEkyY;{zxZ#SvZ!p> zKxis-C4lvt-26T=3;?@-7K+?VE-0~@Nti?N#9dy8e~+nM(*Y!1-%8tS#pX$Eb_6n+ zV4+i*Cnn+sq3J(+ILeD3m_JCxH@Wsi>+gJh9rJ@yNB2T4e)f1*n&@pHc>sQ8(~=#Y zUSkj9O7H1Cdz!EbpZtNb_3EfGq@}Xvbs=n2rsIX@*qqNXres)<9^NZc;)=aM{OiSo zIIrA=#$_&<3de)i%F!3DpdRAm`wE39)-7|5j;=n)wVt>KM~1Mcoq7@`M>C*yQ5 z5uMM&vER8g_Woeq&z{m(0)9VU-E}=b^}B6(H1+M%80S)=_SWM{;BynYthvh%UPiJe zTYlf#?+&YO(hc-QBX2KwV`Zf3vWxFuS^Htnv`W~a>iVW>mujxcqG(rU8%NRni%CR9 zx05&h`}ACU-dc0*Tcjw5ks|-Bu(mjg)#yggoMUAptC!%cez0mz+M!0bREv0qEcG)G zC!2;^9F0OgjD|Hb0dh4{P?y!Dv%a!p{rFokWG_qu<2Q?ijzyZ^gY7?Bx2_|tiAzm$ zYLBastN4wj(5CX?I3Nx|$SdBrSBv43iCjWHO; z0f9564H!92vdlPAkCjs0e(>D^$rG>we4-ep+Q6ASq1y5@24INF1 z=%&I`S3q3BP>-7caQTM7S+iF^1`zF!<@VsWt4bJZw-3N_E*So5UM8DPZSgYBm^hN7 zxc0zzC;8y3f!rc3?|x+k_OVc&yR4)JYkH{zikSu@1~`4DR$-}E-Rt--gr%tj%xha% zyuYe$Ab&~D`6B6)>wKgzO@yNk_&w>$PfUWn08gNS@>k+DPIE5p2O||^ME!#4T*F!y zE{Z09R80$c)tv*k8pD%&$dx3=6U=9C5+gMu(r%Qw)35VLW^MtBNc+U7Fo3UCy8d#Qm>iMeU|I^Pm%3L0&7$hh2B6t`V{} zuX5=mgX3j9ml#AF4M_>DdfKt`&-2Xvzb1e)+AIV}EQ37YggwXyeX(6rVT;$eLQyMyIPeT8Vi8>7psDD(o!120PJ2gG zK=LQV>TmF=w>P?UNJlH+tl-1B-TG`grLDR9ob|gCJzS;P`-tOJ(*EPM7l>vm(F*N~ zYt-)6A*3}n*umbZ+ojYZ{h}ph@G_?c4pD7dd;JrrN;zJh79%S>^r8M2u4D(I0p`c~ ztvOI6Xfid(<6Fdb;xT)nHZQXwCrkK~k z2r220%+`}7?#EsDYIji2sC35L4%wKEwe!VQQ<;=aykkqmVVp$$s{0`b9cD+E+{0oh zGxWaj39k^vY+lv{LGpBLBSkr*P_ z3;h@!(sNnizd7)-gv zJlu9^S|ty@df5Tzovhq?HSf_eZK*0axtsiL#!ok>34)@nd!Rj!g<+mxhij-&Q$4%bF~=LfKs#~`<%ak6T6@xxtG z?}`MgdR(qfTvm&{AQgGR4{p~8eZvRRw|%d^n=S@Vq|4Wo*aP%3HptP;X=)OlQYab( z*XmnIUVY`u>g^1vR>*$mXjh=pYd>qlVdisQDEX_zMV%h+o0tR4{1?l~G&U7V%dN%#S&aRJ-ku{~++7rs zX+7}&=S5qKuLwaw+s7gNl$ga}@PFisoj!2RlDx0(Q7RrpeRaD6E-M#;@TmpHL!+qE z5!qQWN<3!z;ZLS4vx6=mFSB8_a$G-@gbwy2{(eESR!`pHCFz@c1%-da84tWg7e>xZ2ib{voRp z7B)6UpC>_MT)X~CR+TA55qm#;Oaq2Q!5 zzULLHjIwQ22HrsvuEuphLNWtf%2kL`-)N3!B9ke-ac>H5$`R2#-CsCyGiIUn4rzyK zr4-;*H8O#7NlBfw&l)Bk;u}~9mIZ~~CNcZO9{G502uf104T(~|yVml&wZ5j$E08`5 zvl=Rs8VJFLJ;RZ|Zyo<^ep)*3rU22v!`=_RpO)^psn|-O?@Ot2#>GDk@$=+lolX|h zxpU)t(B+*hkWdu2FcsV{Vh;htB&*NxAwGgeI-Yl)nIGKnrmhq9wX1_1X9$5z>su!w!7+l#v z0b!w6y5H(p#tY?*gTQ9$+Bm;sF|^Zv}oE(~z-5h9@K zGW3`ahI8h6sdKPYd0D`t$k3rOw_6|_->2c;*Yv$#*X*|HHj- zvs$kX8WXPufB^$c1~;jJ_Pk4goO*Z5qfpK#s?}_^tqUM&O4lP9S4`Wq&95s5kz2P4 zh+J7Jk1>}gI~tC&6Ds+u_Sa-k|5O?@#~8^5%-(++q)uWQKM=sZKaQLA-YY>Z%g8yV zi|1V(>c^OG$#zy_==C&87Yp$Hwl8EmefeaS5^;$ER>iL`=D%!NSU8dVd3D|v4~dU6 zGSs_8zLz3o^05pfbe|+{&{8o>&&B`3=T3ec?_Z{H{Ga9vTeCny6Gb~9AVB6KQJI1G ztM5OxaU+CH1t=Aka?F{9XCZOHnwyM1=ON`;C0bTE&Gg^fyule0vBi+-*0lC%g1m>; zG^L4Heb(jzH^=}FL>kn@EFmG@bRcI5uAJ(3E&W7)ege37xG}5~eBVP;s)Exj<2*Hd zL_oq2K#V!p8E{LRLtB*~Z)nm`g@)xJs!b69Vs901x2N$X5@uas+rJ`tR&>6Kb5i7z z_0HSr4d*>})u}vj-Hoep{1bVm_58t-VX@43n5u@u`);F4fF9st)<7mTEC8Yv!-f@g z4XUjYZaYJ>os^P!&njCbWV!C)twK=xP0+IQjhYE~jf8fS2Fu8qM6lRCv@QoaPmLkL ztgAr04trDVZW5nq_AR99N!p>(UooBR1Uji*-#8@=dAMhwkWmQlVy^tFa~0xEHN6&! z)_a!@`rWv;SU^$P>cwpO0ToS{24>58S@6uvSB3RmWR=P6#2pIz|6}gGm_|2 zkKaGvKYBc#InVofU-z}I>%Q*mmV*|@zq@ddaCmYp0dd%E_Kg0;o9mNtfis}|jDIt8 zkcId(C)M0rG$&NC_DZ*ifLq3tnBpx8y&eg<$I08ot{vHmc(T4^3@_{?tZ50Q&F6JB zKw+yM?uPqJq3U%tTH%h*kimL}AP9$@DE<6kr+}q5sbC#isa9%2(NNsG9$iu5s_b-j z**9s>u7b0hMa)q-}&q%f5u-%JCZ1qc93ac!H-e$)XyU|IAvxJr$Um|8lHWH>Q zX36a{$3w?Xp1*q|D48WSVO_@PLEC8u!%Uqq-4Yr9n*qWFZ)OQ2KW{Q%nXOY;(;x|`h6@8gVO7ic7*ARz(Mx4ZTBn$5qXq zH(`-?E*V|2;YXag2gScC91{`6M>YADsX!&BwZ-QPRtKXa$7!xXD0iGIJ$bVNG@Fw< zw;0iLovwQ+!t?&Yuc4|bNTr&5hf+?1Qg;VN!uw|?vx*|vM1mPUk_L2MMV_ZlKB=$W z` zP_V&m+DF0R6X2xqGac1wj*D0072YS{Q5baqb**)W$?}P?IWbnHMv5>-S;hjS=5G7v z9?N_pC*XixkbviT#I;HaTOk|M$|Xx`<>3PLc@tY!uc6ADx{1SuNEqsVp%{MoAscB> z|6@x+;n~@L-E(ud_KxQ&aC4~dGe;XF5k$nCU)I{(_IOutOiLM$TBGL$Q}Uddu2G+Y zpx+&{tuM%w9Fh8|Pdy1xb|ENN!~GW#`nbk)UX~&eVTu$HkY;72s}jaj9!n3CAUEkW zeG;-6<%-JL#8h;pXqE21+3vjET0j3LQOoGf6Arl$P6V%6O;E^a+Q*YcXBvD(>0Uf} z%@Jsp?Zy%LFd#QaXwohAbWhtr47alL6?{{Fm`%JRUCDr^u7v-zGtcTOzIw!r^cVj0 zkRukbwEDhJ1=d6TSt>QEtT7QKk2J0<(ZAhgLiM;O=DUM5 zP5adK7ii@^fpa%wHQ2=97d+w=oO+YmvO#jif$G&M*>Ghl*=W^+sf(|;tYuuuQ%Vi5 z1X3ii!W$u3HP0r^28!)6SY>LS{w#SU>d3?G?P2Cp37*8HENE>rilq&eudDWZZXSC$ z#f+~m=Mnbm)Fe0ne?(dhL=bC|8U+__(ik_i3Fg#!s#p!FGvX6`COrNSQR(d?o55A7 zgipe0cvTW{iEsli4V+>K&(Z%x@tCp@)O^ph$?!FmrCG|)Zj#)uAUPuvl%qBP4(}V~ zp5cF9U&bM$kbF$W$U2J3{1y~nslHFdb!F~g@DXJ*dn5bQalDNPk;x;4C3NH_|BaU8BmuS1m#--KC@Qw{L=ZSy7 zGU_F*>V$&!%|SMd_M=$^ql4DqxISXkMmza3p*FB0s#Rh@Q&`>s?WZgs1^8I0TY z*4nT(ARlzd;>bKwBEc3M(N$Cku4g^id&MbGZ9y>+lh51QGjzs$An@CF={Ze#g9oo6 zHY^#`(rJ*Z6aUP9Q9uz=L_o^^Av3IEeuXa+p0%NBrjhj~M{h}D&2-wao`YwhQu$Mo za^Fk?U&WEOvD8w7RMqsf+ljZjz&(R+ScSl`Ae#1CB|L428lav1q?Fd81TiPK9M2&JsCE zJa`CuYwOS%%@_if?;%m$6+m2W^SsB1&4;C$POER-rYjQAJvKv0PkHC*(qMcCr~=dd_a}+ z8CB8q!t)QOEDiY$R^hkD2<*&s2=%1$x5`|!D)^g|ZpaG<%ML%|Z_awoa($dyRrRn@ zDB=hE1_gW8$AE*iE>+{fMQD&s%m$iGD0LuvfS6i=O@d+jlI^&9cZLp0^8uq(P^PbC zMc{pq^gLl;`!68AKbfmB2jitBC4T0(tX{%S#d7^;eP!QqX1vrf#^u+NzI{QbBvZ^i z@9L$dyr=5z1--)ha82#{Eox8NK8tpp|JW&U79@E9q7||sVlLRcn=jeM))Jw^xNa_Y z)@5pQj)sZ56*GYLfmqPh?U0B9-U^5e)BRs+EP*zFSs;h&XUiHXS*iMKHxj@GPr-q} zNlqVqZKuG1QyvhtUWCmIo0O5!yf`csFJ$!37C~rq?4Ik`HVHT}^n+l0?tOHt9SL(Gw_H>?!fW`k2hohM0uxs$#&W73-BlMXDFFD zr(yB_>TtEqwWZWNt52~9iCuwSK!U2|vGnwht+@WvaIL!nKR|m*B8u%XbO40`6FhHS z5V%ga#dOC`oYnUP_eo{we;}^AZQN62JJHAJGV`2EEtwGX(%{j;-7bIQ$?IRC!}nsq z6-Y9HzEtq(*9527>36}>0$_F4e9L%y|6#pDd_pBx{@hT$&1rMPSD>suu?&b=QI!K` zZ~$C_phE9$^6LAZlpOCI=n~NPy_jGKbRMk^5MP^Q!;pwrisJ{R6;&;R{+eN53 z8ERashMSHCxNdq`mM4_sw>zDoq(#32F;0 z!9nGZ_V;}m9I{5<`>Hu4e$`2VRBWxK;= z&yMV9LsKRA>-TKEK?d^U=$FtZY+?jWIk{)+o_Z!3k63>b*#!A)OFtfD7U)mRePSB$ zlEw0Xqn;m=(1EMpt&BKaXvno)*__uB^dz>l6nr~A&Fa#uISiUDkU+jQgC%-BQ`;(M za$hmfGV~Y7nLUR3DRMgyB;EVn2G5POgOdn5RnK_@M3vGqFVM`Mo?LUFswXn%Ij(fg zp^A{^Y}LoFaGVeZ6;*<3X3*dQd17(fW{GWA3!n$a*Y6$*<~VCubA%o+mDslrl+PLZ z!8v-IG^92*w{z{z@k3#;QQct0OU=mOJwy0e5ZKMdi#hx|uONB}i7L&plF&mtaN))< z8{r3^E(IlWXrQp_Q1ifu!vaO;ALqkRf@5Mimv%<~)Gy{Z;ie0ubNeOS3P6;IMbFqB z-oXTJ)k;UhF4-Cfq0Kvz6p^6sZeJz*K)R$aS+ZrM|1LNE*g#dTfO6uiM~l#e(49V@ zx)giz*{-afxemU4KTnZdF8t1{i28jyD6`m;b!K`*|or z0}#fX+QDJhU&<4O7qeXJ9i5n7IH;-@yNwAEGQak4$D8^W&N z5bi&+Kc?WjraHap2h;?bc-kqcLu5DPP zWvxS}#F_6F`0i|6fow7fv3f zzcjw8Ei{!#W;x57?E%P%xPybdM^dtOA7KR8JrA@Cm!!6!KhDzJ8?O%RYbuZkrLQY# zi8fwn5|Ljb=dM1g?SQVB$$nzps~VYUd3~wxLmyJ(Y;}4r2LVMA8^CzVSJO{w`|=Px zkKjB><&?*0Z2Hjif%YkOP~Cw~5Fu5`b2*8ubUu?Zz3C0XiARUNEEOc80_<7t*BTEE zbZ5&P!gO)hK8^mz6S{y4a5ri_8Gjd(UB=pFzYAr6!{n@+8IRsLJqMQ3Bf78w*N?rb z*~@c5e{NH7wmj5UBuw~qEKl~zKfUOzdZg%7Kym_eeil!6cC=jab{Ho;;eZq#0C>lM z{CARHASk~C({A=1u9n$_jUt-vv0fpw3v`NSf4zj487GbvU$t9M-r7z>0GPD?U6Vt2LVuPJ|f zYApodAHO`#B2ki8?8C+nUSKR<-r}@A!1Lg%b;bIpuHN|^ucPHFS3R^!2j1~K z7(GG?!fc#w?*C1H?5LaHmYgI77Ha-2iGMtWB5d=0i$g=t1iF4Ymb32caREHSLkvpY zon!+Zk`@(Lr&CTC3fSFW_O{B|mR-LvIn~C{*E9OfE%h}@uC3N2Hnm_g;G{}cW-Ugv zO;y+6WB<@ZW4HzA{+hW7f2%HcU%TfpqzQLC%XTW)dVg^m$7#r-(Oy2tVZv8NljYYF zdeM#*cQebsIlakmK9Lo@H1(+y#NaVk8*&v&D}hAK0LeN;l9u!Nh2Tg4%gCApe561Y zx!@zNJA8^o!wp;Q=rCjEh+5!tv*dEn?dLs_=Vw2ibJ86o1if%xOx2;#*ZV8C4?qW2 ze}qG+4!h2M>P9Z$ceB?&j`s|;y*2PjaOlA!PCA1zd;8Q1Z3|3i%Msx&`?6aO6aHnK znJ=3<;%>LN)op=-1wt1O2Uk6<1WEd7+jkdT>^^HbWo|EI#93C4g_>^w7?b!sG=3}4XrZ*98q6W5U*k)ym~WA?qbHL zq^r@6J4~0U=f~YvuN!HnG*{4&>C%>QLOdBU6lqmhx@p)0rxIcm2CkJqu;;1Yd@S+z z0~3l^!%1ej;k7^u_2C#_-TW_y1|lgh+HI4<=5C@Uwl)`9Y|#lGq9zlhZlX$NAI8@E zuv!_K@4d#z2S_XXObYC0a&jHGJXm<_{28XN^-NpKx6lu8a`Bo`&R#sfg6h}v$fdu& zjq*9-k%O=tuVA{=ES#s8_o)6XH-WhtpmCoV_Swn(Kuj{i$>!=LKQ@K35c9k{4P)jw zv6g=9p-J?H3!duAyF@)#lL=k%Dn{3tC9iWaZH`>))2AZ)n6?z`-&==}W%h2q(4JGs zEY5Jia)?}zk>IpRv~sG+^3bwEzVI(KuPB{uju5Mzb$bvY=enk|KALL{8+kNd5Sfvd zi__mv-oKFt@<9Zkpo7<@>}m3M)wK#Pg6d=x7S{EH5GM(*y(+mcDUs1K?(kW!;Bowe z`QByA;ngqR#~xX6pB6#X;9P`nl%6AMp`J{h0>hI$ zX_%2n8io&m@6u);D`K%OdSg67i~V9Vd?MQ%=RE5_mVN7@!LEjG=ZEWUQ`|jgUUAoA zI!{&$$ZO;W)F-cs#!aAg9Ak32o@l9GR;tI^Z%%R)Nij0E*LJCo1wK#RxG+JpzPNy~ z#LismG7ER;LNAAHJ6DGhw-FaW15aFt*6rF&O|R)6VNeu#(#5|?Jv^rF}${zXQe~Dt!pH1M+1L)?3cWL z?HP>hy;*CikrT*Y9s@hXDA1?t;o>})Lq5=j>f$`gy)cCF$cd~KwT{Dtna8?)(-|!n z)b!9md1NlZe0kJzrKruQP)tUn#cK&Bmh%fs$ge&PsNe~V0Xo!FCuh5w`l4eU0lm#M zrbO9XP5pk80Nh&Rm@G4lF0D$$n7sh-A(E z+t9X|+R%#ZET+V&z(>bsD<+xnTfA5GH4x@fFyr}A{zb)5<5;jdt~o4KRev%1HhOWP zW@2q<-P3>O89s`YF3>9riE@0P`>~KpgwKln*(}VDTl>U?rRmIy-ZFCeTwC5)1qI`b z9y0R20>5a7jV?lz>zC-SNXfvXPN}mw19T2eQZB4h9vxT6CBFGETpDh!TMC_Rt(j%A zH|4FAF*>n4V{cofew<>dr-vo&OB;*?oMA`sU<=unmt~s!JDq2;twy(wAF&?{I*hF} z%w%T_%(07h?(ag58FR5}PBhRCp|Z5tXb7T;xGY44$kKDIbV6kEC9G;nv;giQBj%(&-(J-JuZ3Sif$w>Z6Ae0wOq;N7wP zl@F0}D9%x}FP((>ll0$1RiI~Nm>M`b&_yvit~I<*RCHHbYLVTqSr$Unvi!Vs`>}(E z_-9_(GUjdf66#O6iS6AHeS+n#bk&GU8l zrD@pSQB%7Y>Q$=ja4*o?in9)nI-_tiB(?8x!!l1#^R%^_fyk_*+bm`T+fS%+%ci4{ zW~R}P308O3%f9)N{UMRU>)mgro^iXJg;mwummn z8C?3rqz=qt2izD+?B=WlO`5f};!%&BA{iXtS0z}QD$2oiz*+mie8AR&4O@ng}=zlsQ)yuw!C@Y2%B;8+bbOqZp{m(TFPb02`A66 zoL%l7n#uZ7OR(+`32Ce8Ur?HrH_&gdT&P7Qi6l@Dv@8TN=qZqWt5y>~oMZe5Hmb5t zbvN}XvEV(&x96i3;tax6RXD$(_x zowb%hQT0NIM(#SR6WnO&_MtuzjE7MPlk4qIp8VmY;%4i0-&mV&ICRUEr8N{sAEV8t z!0|3{Kk|;s?3%_sc{fiK7%|Pkb4FKr)Q6vryquYzj%oD2H$BURl2hvl7Pw<%*=Op@ zwUBc%+I>UN;cli?mBT9@)1JDI*{H=_X!7xaS2R=J!zoG6H0tlg%c`+hMcCl*w>>9R&2^P)6=Qr@(NmPrfNaPv6>$WzIVzL|!n^*`iEQFUiXrO$ea$N?o zjL|D0$yZ~Cl#%yBVcl6{!aT+@j_MsDTZZ%{YYB<4$*h}2QdDz`8*!VKz_H*H-fBtQ z2C47$;yQhHhLY}~n3#g{oJVqQt6v~$gvP5?Dw^5rR%#RVwAhqsbCcK{$w4e~tsx() zyV=-3bi6KJ7AEXa9pSa1`yefLbA)_VhC?5nm-}_tf3ORLG5<$n%0^-;(ff zRWNRWSp}&nf4s4zciB-fe!+Q}UMtAJLcanw^VoZvMw_PFd8w?7IWjC@c}`~eTr@p# zpG$>%LYbPxd-n)_P~mNEQo>|+yK3Agro?yCXhMQ&qYQORZ7l8mLX}P(I9utqUZu8< z$uGAz9TMiNltn$5L=F~lTdWo2yfG8Ck#o1|ttUB%wmdYGUtYFWGf`DE8f&|pT{Dz5AGk-EP*VD%4TiQ(T<=>klipb(cLf|e4Hppr=L!vf#4>TxFLnl zcbk2Y1o;g_`*|$4N?EL3oHqxb`}X88po075m0N?()0Hh(J@Xn)J1f$~SFvHv9_18^ z%S%|T1NU*@*2Sq7r}g!fc5B#r@rV=t1sO+EL-NhI0-ixDgOVwqM6^=k)wm%I!l7sM zHVPD33!}N1-pjQcECqE>qiG5}1~iuIK4qK+BenoAE z`s8b6p13%W@-`R}86duzQ7LL!|1RDVT!yuh3#-7)X^$t(EwRrE*Q?I2A-X?%Rb3eO zkZ{$Wx%5walf$Nqu3m7FOn9MPe_X`*`yBkCLk@k$_9e3~%1E|mjSKI$2=JWo4i+p$ zBn3;$=w386i8P6kAYeVELU-QsJ?{X@q2GCFIF`w6-XEPc^ti53&3nz-LJX@ebSD22 zFd{Ci`7M@fYTNngf_lB6#!G0rd|kllvflDD(GSYzFMaSJW4OF*49%|=M|%oT-VvPc zsPb~JmeR*d&nI=Jea41CGazsBoe5eu0=sToI zF?0N&jaj%YZ=`;AV9c>zIiVG4k~5PQ(doPmgwutahlTD<9LVfBY|!u0??N4qObQs(#QR@2l*N%`4+oI zZxeT^&|XicK?=o+Ra0vZd!S;!Em@-UBTr=~hCteQg$>m}z_8s(+wrZ_h747dh&DZh zUavUjMOSgMf1GJKi9ttbUZZ6ADf_%Gof{e-xmv^Z0qaKt988c0;BG#$UxINkD=(FIMP@D$)Heym zTE&NM?m0YEUpQtAC#Iou>U76BguTnT z?l}hnikr6h(H>)m-;W>iVGx>&DdpCz4)O2Hq(AYt8v{Z~2FEqAq9##cERhy$4C~}J zcOYwxp?6@7DLl3)^#o`%!E_LAWI)1iLwdrs!oLKw%#3^)(AyS(F4GuI*jnF|M^?vl zQ@=vn(x@tgdX<3YSh-3Exr}n2n%CFoYqijmcn$X(y*C`+YLeh}9O!5$PD631kK)27 z7?zQ`sKUNmvu3t%MBk#*e6SyQ(o2ufg+oh49=?N9sHsTZr~(E;Juy4f2y!FGubL5J zG~DOfow`u3Xg*RXYgiq?nhB~hN0#*1mX?-UcP+)9V!+&r%b%Nn@(r!ZzknYst@V0( z)#?4bOtF-|Q%$3WL+xf}k6eL-8inbbv4J*cP+BHDE?Mb_Ou%Z%D~t78 zn;j~3r`d&<+}E3C=C%xB?_OF@gphlWY_$1cBs`oIyjrY@Q$*Z{#c<&O&A^0iNgBf| zs>R6IXii=QnecWp34^F)*Lc*cZ_^;*HnDX)Kh~BnR-fhCW=!}@-j@PooHOiL+LuF! zNAzt@%X4TqS=yq0Y7=tWmEC7dk9#M}X+M5hJ+41}{LDG1tIy%vEF`}1R ztbd$tc1K*{f$$p>v{5T$z3>Wor^*eTS`8H!Bs`iNrK7#j&@Q$*B|L;6uGp_XyF zkJV&WGfTH1I)jtw>}}81vI0gr4bYIL66Xw9iA+S7zg+4!@$RqK96oDAcf00X0lIg3 zymTx1?qIyPBbQT8aK8B*Yv6dWd51J-cVTUSMX#zyQP*HsG&Pmt9xcMr+F_xjBAT>efva8BOezV9VF;bm3DT0OWf@#ung)+;%5 z{ZaWb!iq;IiRn=^I%rdn+v$h~$omlkj{ltYf6f3>Kfrj?BBw74jYBM7rB6IR=ZiYT zm=2Kd=0*2}X?knlsQ^ipLmSP49+=BO8?-vCibpYGK^X zSa6w!*)UE0^`1f&%g*2}nyc~p@t}9Y;b7s5xcuM1{PL#d)~W}eOrrfb$UCRnm1OBJ zxmaEL(6xQLwyvrCTeBVNb?xJR3u5Au`BE|4?edA&YUq{A%i~*aAU&oI(qvAkV(hpH z`W;tn-8u@fh0=`m77Xa0m;spt5AmgdXxr6?Ui#;R9$0{MWIb0=5;VtxuNNiksM#pK zxgoMX(T?fq6cWFkMH)<7Iz6r{i1u$;fDFECUWq^A!sr$$7qQ`Y zAv;&>g?RO-9;W4}*K1Y$h8L@R5oFtDF0+48Q|DE8! z^Jv>u`)tfT2A=iB1GTSI?&o$~YM3VKcmbTUe7ihauKm3-5GhhmKp<4yi23_N?#ter zj$J1sMV8gHOkb~}r!!4Zz>u1E2X2+U)hd!yFhlao_YHzvP;Z?|97N;z+5GNi3$1Q@ zkzNQ%qlsQ~m{F`NHeF2kTT_pIh(YKqOvI|pIg6Z-6qaM_{-p5!dOB#xe2vi@hT>Mw znrgH*My+X)e_VIbPDs1=x~O8y*uvVzO!sW0mUDB7coggGD;!4gN@t1TNJ%4>bc$SdfuIjzfQH}?bvym=@mo?3W zuQwN2Ua1nq4q3Q~OuS<7Vn%*BIB2*W5F1kP4DuG)=Q@UH3B2W+s_%xrK1LV1_T^2L z1+i)0bnEk?vE$qc&vXo2%`=dCtfZ~^_gmDq)qg0+d*21?9oYI;`x=WN3YqnQP4 z0SVK`GR6=$<*rN}mq7{_aki?(Tm-9zuMq`w*b%r@7wLc7o!LXq%oRb*qA5D{tmrf0 zE1s!`{IeFy%r`rjsgB$AY|#_KFE2DM_^SJ68aL*kr?0(gL{Jsr6I$TH?{pBM-uF8b zW(iy;KW_iTN5{1VCLhAkAojjyrHVQUZp^i=8y&>vYWk;$Mc0GPB4`0h{O>J)G~+@?T{Jf2VBe)*1$Bp?z3 z&cn10MkFgnBXsRrgngD}tDMLdD+?6JP~Irz2#ygM&WtlWknl$EZR1VfnTprw+-c$G zm$?;t1M(Pxb4$L~3u~pJuL$1hI4-h0zmX{@J4S6+3$IwHrCgYxBhA<~P+GSeuR z+!tS#yuCA+ax!5B)hSiZoEC+Hf}s<%+D=+%F|);gI* zUzNQM?ETUk{^w$w81l51Ctsu)4-npqcB>CS&Rtocnb`(ArOl-)&k^_%Juw-KUBL2NHLW-!mzeA_H>Z1-rD1HCB_2VIb40vxz+-w(=tCBH zSJFo;ya2wow&r6UB(VR6Cq)tjaC^!y^QY=Q%E$H0FRot3SFix7i*)3D7cE#k7Hpsy zp#x=*w{px!&x7#EHmK7SxGN~&K-L`mI+N9Dj`L--mgQ9QoMi5H_2p0aU-ne)Ilg!w z&FV5CjrocRly^q)7Jfdgg)|gFJ@6izLAVu%c|Ce@A($X)Yit=V*_LobvHo~`+cU@h z2U$FM!x*?M@o5F<_cZK6c4yN5=L zaNf4w;fgzQv?;elFsEGZw22V2oPkgNVWD|^9!$peZG`_7#)2dSjPp7-0mUFV;<<$@Tyv`rk#5b+1#Dq-KI&4Pl2`Ci4a*XU{135SWR>5)c{9I%hXT_Q)3or z#K7uEcAUh)ug_ z%g@cHFk6LkK6LrP1yE032vi1XfuY>~k=hg$&_B+B3}V(JkouklkP_qPT9Gc@0CP| zRe}WB%~nvKV3BcPg1rADS-*?-_%wyfHN+!a$=nsP)9?4NS-)AdGcvd`;%zme4GezZFXD=RzZ)OQvub(5!Tx7nHR4Y9%mFwx56D>iiI*NB+m~ILuQBK1 zrL83&xcsA@67dfqZ;|tx%}t*~TQ?VA5pbJwRK04#kDvm$Y=_)+zw+HGv^@iix2#}d z%@>x3aPuO}2|#kqf-=TcWD;}CkM9mhN2NxTw<{8G;z|Ui#f29^>BVm={x?3Dxq#lw zd0|xYeXb_9Ij{}c%O2}yK@ZD{;wf$2Al#fRM|EOH*oO!_hKkqg9onftw+}UkXHXej zSp+MCT#eU$HjFt)836KDiQ5Gh9W`;kL$o;;u67ufyAb_TYjkBEL`6Y6+o?b;soy#jLkE z&#hEWzQ<+ey(%hJ>e-PdnUS^)J07i&R&4J%2+wcrHm0Vhwc03XN%rMMJP08u2(nT7 z9+l^vfZHsWSspkzq<1!EOc1lZsk?vsN- zJo~2zlw)zFs4v;&k;TQurJuUx8PwhA7EgRafpKC>E^8$hwj7&tJoR za;Nrxkq9?0vk^SjBgUd&XyFAZt>8yh4V#-H`50B?Rv*LdL6S&Nr4L-{|EW*@FSxrd zHSNwy>YzY^jwtk=-j^St)wp!^H{Zu(yZWQ^6rh~_Np|Na`%7rt8#>yZT7V3o*S|XF z{5$~wuw;zeHSU#csoYQc;1RfOrUY^c@7=QTHcB9lVlv3TR2Pc@G^?~^ze(I7KU500 zj!(Y6&HGnC-8~(26HgUaf7dt{Lu()2w7YB~+HmUmK zF9{8L3g$~bn{#w$#sB4BocMDB+Nai&odC6+Kbe>lcdAWV?EE6h4p;vRB|rYlEQ2#g z9Y&}fL_kS#R#Ea$&sbds?RT44cMIXQn@B1o%^&47?#HkKQGI3T{hpN?3RCFO|>#Ro@%20MSvz zJ80OU_y4XsAIA^r3^p~p{P0`N^B7UrF3~2|k5O-3tfOm;J>k{nxz& zho87_9p0ZGG;#dU6>#y(E^dD-h2Ce43eRy33H)h4wHMoe)qd`saQoNGUrA)$Uk^^L zJ*!9E1E+7+MDF|| zax-9tOmz2Es6Xyx63d|ogI_xBe<}atqoarKQhRo$TbR)Em$LW&nn(aweC2=A)aNgH zpB-eVyINmM!s>!fInlF$*lX8WOyb!&C}QiKKwto4@VK_iWP5?qkpeZwSBIp7s%ljo zTA`h#DA)qU)7gK@r1BSZdp#M4nBjGW9TiL03T=k&o~kD5@cb@-^sKa5=EDg7tNU`< zzieyUQDA=mNC}g@En(mR@yJMxaU*lwjt(LV{(~n7jjq#EDcn68Y}B*W;ut-Z zoRTr6SzNp}n6fM?&v?oWQ#4UJ;ULVv^kx0&>DK3XT^p+l?X9h?Np9q6LM%L@uv^bX z)4(wt2muiVGZaAzUrihg^0`%3{`nSunjrJQ70;dD{_~)45q_9d|544HoG%Sowbx-V z{`)(clu1;Tnh$0b=z2_jyV&jv{QUTTJ^)^admWa2fM+q{b;(SB{Fzs_I&f*fL*fbL zg5)j>3-Rcl*S2YxL({}xus7yvFAVu3I8d++Mj(8wQnU1dO$6vD*VII!Pc<8zrN8sR z1_*zbm_vscM4gAf82;z)AUq+IjFoii*4H#2^hH71)X(q2lnd#^Pjj*ry5+raPz9Wu zYPJ6X?4BQcL`6}_IrB;5;?LpCOppcnvQwnd&TiFr2(spF1XVBZg_9qzpZ$h|;wjIO zA-k0im=BaV@_c`rO$wok$7Lb88MCPrao$a-YxMO#KoAtO0;;QWJJdX*weOB{!xPHg z>MqIGXgT%cYeW>2rOgang}lZG>TTAv)veY_0gZp z>^qjEvMG(1$|Ehb^QS$=9#=*f;-@9KQ>kQ4k-~^b)_l|je?p>v1YV3jl$aLPmY3eM z&nGA*op7tVVvfU5Ro|V{KclAA8p1%TFGZM^ z^iajxRIOg+lHQ$G?(w!Pk%qUSH!lfDlSQdD7Cc&eU&j2+$Yigzfm z&3@J;)3*IH>wAHlc}$B5JNjV{BrKLxK0KI3r&2|{@rY#nEj*lmZS&*|@E^a!6C?xA zey8?RmKT=oQ{96b7IM%0gmo{*dO{P{3Uo?^B2nsR?~*4&ne4lhU*I^ay26=|MATMc zp2=onPrDz34jv1e!&AK{Ky09rm+_THuVhCl9almqu1HT;U13a>qWm{Q{aT=`CsYlh zo;|y|Fp>EB_3L7-)~EL%x4OaM)9>)WsR^lvE=Cz+oMyuuRUT$0eAfkl=QdM*@qo7143|j5FypRaU=%*+s$5gC;c) zV=h%1G*R3!)eYp{yH1;Y1);ao82b@m*Bix7zg8NjJ zoSzElK5!1G5#Oz@2z1r&(VH+U_`z4uSmIX@wd~Dm86T8t5|M5-w4 zqNp>%5Q$0q{-RL>x^D$GW-8K_LnkboDb0OariYN_BFPAz%V3f4d zT$|j1v5E1q;D8V9vDt-jt3?lV6k(?DH@XP#eef1YaG$A?GrIaJLAH6cZKC)FPi)Cc zrWe>xC1$DLQHMw*q}z-({AYVhLCB8k^X-J?yppDUZFnBk0HPl6f1QG>rnOZek*5(H zoc~U^$(wMc9t-98)Sh%fZ!xEFL2%&vPZVtxgTEupsyBO>UA+9}!Lc8&DI5VG;M6=9 z(#Jt|q;YNib6jy}8&~Rmm1l8z()*Coi|Dv8Xdt9bY*>xq(abx;qgz_Yd|f|$M>&lX z9z1xElQ%}dLX|G8$QW{t?H=U+r6F2lpe={8`V4Kl z?-Gxm+6eX)c-4%EtSWhOaamO3)`zXen~0doyS~Q4R21gR4@JthZ?$yKmV+i&f5a!y z2ykaSDF@ga{=h z$Z}q_L;LUl)g7)m41$UX1>KkXSjAl|QZTnRKp^U6uEp4Eo|O+txOGb}l9H0fYvdZo zS5;LV_J}$3+q=JD`X98c9;2wN?{-or#fK15hDmt1Q&&DcclK-$HLtdM7Q?T2?T_G} zWyHr%`!-n=iU%*JdR(t|v@qhF;k8Fqb948GB6@a$qyL%ncix!14u-pn+Pknp^|c{m zDw!$l$13S)(j)h?;kadiXpkU_*zqp4tY_t2;;Un+y}2gcI$n5PcVhxf{)RsIEFil5 zP(S5iiZfZBc&c#nmbSl5SV=%cq%Vh;!f}{lQU?G%D5fQ{kC*>L@X%mPe%v@CgkVh- zikE5~I9T(yd7yGYr1%a3Zq%fps5HlKi2og5zvTHBK@sEt6>Gymu;+O2=EGoN&26%O z9YX`gIFB4h!|~u5xH0~H-oIImy{7Smp2DHrMwa@fCsY?a&fh=QwNHrq1Hk!H%$~u4 zwpqaaApVEhQzghdM|OYh0Qi~};FtHZ3#jWZ1H(J%CroMnx}e)Y7NTEVAjgNe;l|L^ zTE6`27&9VS9ah98sxqcH?@=}GzL7*y9#&h=e2y7v|=uR0Uc|3 z7AWEnj?Tjru>E|f@4{_&12Cv$P{JuF!UK$1n@B^y`lHnUWWEbPdzhadod$SI0*~|3 ze^)qwwA>h+fzUh-#vW}P`y0*?zBm=Sau4Q@2Y+%DsK*m(xj#eiKMsZeLrkPk^{pc< z91ngIr&5*O$^WXbi*Ty2(?%U|ofJh9O=X0TwJzPslics9)1Lng5fqa_K#lk^%Si&$ z3}k+(MFYk0H-ln&8c^v*T{mC>1y*LyvEV(W?tiA11`IR^ww;Oc&VaH1gzX#Zd{>j? zIP*3)(np66p=1H3>s*F0>o3)@w_cV4q~ZLWtig{Dd5JT3$s3G+HKzB$3W8I=WSxKz zJY_!Xf}P2G@a?mrO!WWTNL1kS%)dAC?!wu#t)DG<|H>i+Pq~2#8*R^ZfDo`r0XIXb zN9?acGO7f{;|i_4qewtBssSEb5)EO0wMsf3xiPkO3Ot zEtVItA7fUy2PX4kLX7=w5S$FC3ww%oWdfo_UofvnRIbinRg5vfc7)=6;5ZS~6O6aS z#`^g9Tnjh7JSnhGH1WVbS z?329zA>mJ%0k=N9odZJB-^6o&@}A8uzDlxF3y=)B@MS$_gn;O{BAD}WqH)vyWMhWv z-vM}GtLaPwxXc2n=dQ|1sAl0k19D6sp#NVq99$Ep-$?$S*KfOM-G4=`41fV=FS-!p zLw+M4@NaD3)qXUAjT&1m@k82qri$$?Q6NsN&c`_urlx0t$8l;Cy9B)WgbBM-f0PwO zJ3v=7wEz?NJ|AF@h3l5yIQq0cgg!)kR^w1m;j--faDk!hG(_W_Is>b~AJcWM4*}a# z-rxnS3oQav3n6zcWBL9ma>&sf&P-@4`-ER4wEwC9<2$orUQ|^17 z`4UYSh*WNKJ%nJM0C z19wxFY>O6M0m5P)noubYM-FWm3(wcFTn}*vy+RW}!}H|FTG_^Xs!7=t5I?`2cLCAW zZ>AbJ1zcxU@hr}JtBtKE7u2ZhJ{p;4n@uP zRP(17$|jdfC&a!Ex?mVIMsXdHsJYdHFj86!d-Cvo;KT&Km&`s5A(#i1Yr&X>Y+>Aj zC(|mQ(ehuD3o8Jd09m?R@BTTM<&MpbjVE`Tl%$#k{eE3FEz^V^e5a-Ccx5r@-Q33D*#G>^vB87%maS?g7P&Zf`nMNk zrw90CeLUudC$tK0Hx4`q!vMTQr~Zl`F3kHcC~VTeD7?AbB;yHfA?_dsp6vU97%^sgT(}7jAXfH`nBU;fRCB-P zG0q=*(#T*(SbccAsST!E=z@pp%L(2lo4v;{Y@O1Mz~xZ zw3*=P@k-+2sipCvJ8ETd;+u=@?dNJwwqqkHg0n&2_sl$Ssaj%p>9_!_rCnYK*G0A0 zAyn7-Yk4nLtzxZnF*wsT{!YcW*^Mc+3Qo`=aUwdDOk^hOx!Z?W5^yDh>;DNP0A1oC zL(u1vg8A$zh!lyw%61gvi4!*z^Ha}!`Buv$@5|_HTKxGb*R_yRzz7LK6{}5V@xiLn z@?uNM#o*F8rM$p()X-MfJ*|@2Vhif4ibqogRR}WQD7873FGQ!se@T?s{F(<^Twkz5 zKOzAa{TA4Pi(yqQGwEdeWxAm)%pze(j+JIQK8bQ6X^iIADWdfOaGjmMb@M7! z=L8+>63T2kd;zApvjh1*VZbF^9Q4TmbmE3ogA4>`%_R+19Glu*(+7K~RFqNz$wXG8 z_CP4^L?5-}e0Omo%C=4MI)h4fhw*!;!s6j@UM|yr2Cq$~d3)ccU z;cNzDsGhr1i4%LGa*rD(Q8w3vmpX1=FH1ane)U_RN0jbDME7E+h-jCw$NbCKw;o#)9vNm-dM;yJ zdc7}IkN$t`y?H#8eftJ1Q6fn-6iLx$kP4B#gtChnj6KA-o2;Wzr0mXj{U0dK7p~5BS6jNvs$+q%qCP&h~UYHvVq=6BRTK(SQ%l;yhjAv zbe~FWEY+vMpqj!Pd5l*ArIh(X<=8z1hs#E`@PIV3KTk+fzOt4y_XZ%kvXTd^nNqmFlEf*zD`qugn`-ozCCptHncm%|7c0;NW` zA9bQrit4F~*WgL-xV;zcQ46Lgy{_5G?s3*S=gp4fNs-kb@8s@N=)5SNsEtSL_n5(k zh>YlGuuZ4dCY>v4@%d#VN;dNBt6z6ELla-|^NY}u{e=_wAmmOItd-8NYU(+XGY9(L z>|}p|)v?YG{baFktM^-0fOn{<>%0v}-XCc@82TdS^!4YFi;>Ds7l@`@x#Zz({=pS`zClvJ-cpMR_kYV;)`AW zi^Ybwd}wy94a-T^P)e(Lj>z!2so_k*F->zL&CSr;IA$%dw|4C3XB)4l7us(=RjVCy zDygejpU)nF|5VV_cQs-1A_y&jAH6f27!c~Q^h{m(+Iu!yaB*Dm-|LaBIU;fP1kkITxL)T`EbRQTzru!3 zlCO3i=Vi5Xiy=ZiQIim%kYK7!stY@ZZDF&rC>Us5MtcT}J~O5q@u`}+g{w2QX$Gr( z%&72G7Uh;|>UwA)y}VTK9S0|?bZ)}NyCmZf^k*qd;m&$}qrYRnvhXp_wJ`&m(Viw5 zvp84NR;Ou9wi`xaF~rgCR6P&nQ^}R~`rJn*nde%Ui`&YwjI9J2xAMmO0ZS8nRc?Ap zDnSlGqUH>(H=`#s5}9sX1~Zs6Q$OCGr%sh+8A7vM+JA!YV>uEi-VDzj4KYu1hKVd` z(11L&=oO4+I7xS~8*Z*+c91-daVD@qS?E2CD|tcPnAD`9v51U=E?N3{2l{5el-_gk z<%pTA?I?fzlA_y)f&p$ZPB;&YO;nL?r;IaXqCt%$9!GXV#n&e*Bx4`Z3eFuJ3<&wV-3mWAC8Lq= z2B++LDR&BE1-o~gqvf5MPwO+06)$N5Ji!7U$}e?!zX-OD=Wctp&wJ(A=dN3vHboKy zrB~=;;wgOM06!Z2Xq?Z5+`jZ<3Dic=ihPu!HBDkZgY<5X=u0Ge{EoXyB^)Zg@P+i2 zmb)c(2s)-KvTyu+-wb0II$QEM8c-9v-_tS+3aUM2mT^oa7WTOHkJ5M}wYIX=0X`c` zfAGmXGPNZ+F737Ytu5jM%xgaaBtN}9OAMv?DC06rwDBBE*GUff_FiJ)RchL5M_YLW z#6~g2ZN2ujyxNsqbvdQsCRC61NUr{1tVqvAO<|TEP<&bGSu9U&YZ|@JY~W$H{M#vs z#vcXg>qt1sC7LYU389^dY-8UmkJXim4RToRdvi6b&5IuROVjGc<>@&0$&7Rf zChHyEtKZp$Je>HMVLgw)m;odp3OnQ!EGR>rlkUPip+Ucud79yZP`KXV6&kYkGw|c= z_FOUK_lBd8oPb6wAQvIR7ZuRjxX zR(W;hx&kl8PRB2=ug%I}r9-V5>C3L&S%pJY_boM|&!7*;lvKNgzpJ}A^deB+6hxt( zflN<{!8E6MwR3N32);~rLCvyz&NUh%AK*-l(=7uayaULUMj?>!?*> z=k1a}yvBe**j+7Bb+E0j#yHE=17~iuQMjxJI}#JJOp!94nlV{*fe<|};@;;_Fburb zud0|n>rZax3I7b?ixE&%=2jSo)KN*wM&Fxa8`MPXpf9*v!*VP~)V-K&wK%30|3-k5{YC<31Soa`3MT#V*R4FM9*Fnaa@N;H;cOl zA(qPW8wRhlqqz*;Ki^q^IsW7&iM}dRMVNU7=PHC&2BR_qZST{N;U_X$X!QQIg~AFY zXK+#JW-?b#;$>tgPSx4=l)8?fvR$e^IObD|sf7&F+qg(eN5UW(Aw9390nRp02uGEn zQ^@*<3OPZfk~iL4F&$zPOq=PsSkkK$zuJF_J8pk zKdbmmD=>sIvsJH8hgL!ZbO;^&=K`pqN!E3h-Tya~f( zYUMjAxw7?(NM8GYvMEb|on86i(2DxZFqnLQ_r=LqE0)HvDnM9TTH2yDQ@fp|LtCzw zEsh=m=cL-evKGr7;r*UOZM5`&$#3UjTd4(|3b^Naf!Ff!XRF$htG+^%zL3eHp4@<0 z;xC^|;WJ50o?$df=dTxYXz5kqSXIZVj4aa}#%Ml8d-8THcuqRRDqW!g%^4EQxBha;L;udkfMK_x)PSd_vIO4wR;@+Bru* z|5g}$TD-E1xN44@`^l*{x*N;Sc6<<@YmgS4>V$kiUE_cb%$kuK49>3rlbW55cl=@$ zkOE^dLUs3JYLSdVZH%UnCJ&?8A4q|vn)O&b z*SWHvFcGQJ6rZ1;nLmqWcr4@iiqCtgmOfsu_^7W2<8}AOApK`G@AOl$T$s=H7AK_7_diuh6`g#|Syi&%>VY>OW(D0V zrJ=0@kymt&x+rt?%0+CU}p3swHLeaoZ#qSVjixvx`u#VAvR7Z3nm>atEEGcYx zI?cPP5-nOS85D^#R5|-ajn~U? z6Cy1R3FCB*CBndAam30dN3d80j+z*)!-W?ShwPr~@(s0ayxTKPNj$0{a7|X2h%G4- zuAsR{#wRgZ??&vLtlas@?Dr*`L8~QEOJcgDXFe;l3Fmq>i@Z~Fqa77qfy>yf>877n z&ojtISo;dIJ}!*1UHs;^KUYd`={3xIX%_974#jHgZelP0uq>5SP^wnin`yCY?}zM4 z2)ek@f{L;LOtYH~EnXpRlF&|GPk11i7x;Qm_#N(27-Ffp-ZJM`hk4B6Y_v&m#(hGWSGIkeZcIE9qX!^WIdd>+?z%+K&@F zT0Ie=rNBAHQg z>%LwjeHciWH)s41WUKFrDxgu39k{Tiz3@Qn#rMORzF;qCA&|vcZwU3!YUNA(w9NO# zG8c(~*b5SB-Gm25sT^4s_49D39G3ex0QKs934t3YY`9)>Pm7Az%u@?U*V$2nr}Yk! zg_3=*mvG__%833p!|0vu_SI*ntWpu6rlxNQU?Oso&a3;yT7hGNH|6g zyjNQ!kMy6PmhK9fkjh&r*^Pv^LA$QbgWZy>0cWAD(+Pz9uK z7BISupmm+}PS=r)k>i~$tXQ*VdQc3Hn2YtEu1{$UZr57Lax}H`ESq}z(+k05tdzCc z_H|hjh)ocucI_!~|GzE|s#_G0W$ls@1h{00>7Whk+vy!Sfy;@uwHdvFf2`4Yl zHwM0HeSn2_{{+<8pra}-;bH9SKLWHL5d~`Q*+810K>L1X`aAoEa2aHuHY1%`B2xRY zjW?!|WLNFttxQJVr6g)SSGuy%)(4Dd0w1Xn853WF`SsEeJ~PRcjXywcO&aaE8Emff z69R5-&mqy?jFq1>H||Rlk7DMP)C370HVEW{0JgH9*``(wnZ0d`1Wi!b*fis~OqSdA|1zHWf*A7?Q8vi2mg$KAIQSDf8ihF1=O)!2{G2V*NJ1pI{Tlnv%AHM{ zfaH_N(wjF!M^)9R+;5joS7(0K@rzAT()WAPR}2ZNC|zmht7;Z5U#Z3&Qzm@w+Mnk& zvL!D|PA|rfGLrq1_F*&*;sujvRW-Jw**kCLDFxCh4Sav$P@Z(y0X@E zlzj#{^xOj5n|#}hJux!2=#KCC=ac4*T%#tr4VL6P?B28Gad8AYeY?+`N^`7g_J-d8 z6bA-LFvG}1r_4)+XQ>~J}i z^!Y}at(xD%DJ`GltQ#BVJ9tlwCzpM!+( z0^RhLs5Fl&s8kwR*n~Ah*RPqedo`D4@!W=0ZCRx=HGziLH7G<8aH@>KZUKCP7B+#!)n8*3YDeegzMy@LQW$&Hr^_%B1z&Ci9XKZP zuV$Mr_|z4_IQO81V@{D_Ro@XSb2NZbhHw%yN70HTzH`a!Yw3MT>3iq|-IS6haHPlF z4CLR$PsO?1fVuavxt%bLu1O7*nnc?Ir>{n~Ic#a(JbnRy6|WY-#xoO;%vu}_Mt8ry z`SA$}V07%K{XWU*S1@|ighpRN=XljTJHTAc;E>uM>ktUns+i@&AAv)NzlnNTBS)mw z3u#sB-e)sawy2CC@yVUlt8TW5M6#oGb|y+gs61o0}nn^hPo6N-q+{Oi!VC# zk0@i)cJxaFaL+zuh@zrV^pz^Hrs=fi)EX-{CEQk`43pbr2lyx^EDxFVbS|O9@1n?o zcy6_BK(rlo*2(Tl5cVz{K-t&GxqeN<>XgxJuM+%zi1ENf{H4#K>B0xv3?S7*75=uI zrW`9`87+6j_4ovs7!+}!+70XXxK8+cFi*ewBujnBS0&Oxo{-VA>KcZDfktim?$#Yp z zF?Ml>IQ0~nGOKmZ@Ez_eTlpX-@r2`!hRu~>VlnSGE$>LAOoLPs#A~?mw<}tnKm5f7 zkfA~2jWL??K{?%$tM4R(yI?&Rf(%xZlH72992?7R6{e`NXg0ghfp58{Qx02J{PqL! zLC~%M6g5YOC?~MFqZ$-ekPche+MXSfkJzLdubojPQD7T8*JRT<(2LKiT{=*5qHkd= zp~=lG@G&DQ-XbfDG#*zl_)+YAMs%QUql>KHTvCl%G9$ShN#YNLKd9-ftnxIbwo2NE z)wm)8k?WWQ$7S(pXQE3rAjh%P))I4x3>b}l_&u&wy|qau2Epmk7+M9H6PXNNKklLG z19Ma}6T|IBGtyG|JigJP3a?P7pEUr#fqQP*-+}XvCkm;N*n%H*!|V5{MQRjk2dlQh zG{WZAXqN5+KFQ_^+E!Q0&*F___Hrx21u8ngOK0qvkC*C(8W+kacjh9ep=M8Yc4M(6 z=6K8le!?C@88Xp)=b;Mu@Hf>sE2_PcOCME2HO^X*_zboxFDHmXb-CMa;Q&BHXw|hu zd;Y|9r|nuD41;qga(Iw@{GfWm@U~1-uY#cw)C0UiBU~+M#w!=6N@tr`dw%#AL>@er zY|h0@4guCmZjk~fewPd;adaAS;fQZg5}!pN4@O^C00x_k!^9(~GxVneHD8*W9V7?f zJE>)JPPXpn-KRhf<)C6Br^$1^Rfm#ZHj9KLjz7+^RU1y{?V1kWY7aFf?$*7i*a+Cc zz>5%hNWMf+96IePM+1Nuxyz4OA&ZiyigRQwA8ma!K}C-9S{|7zUo6W8NR-F0mbXw5 zfqi4L{N}&jV=#4Cg(D!(otbmFwZkqm!bgpo2TdrOjh#B&EP$H|$GhDh(ITg_9H~3V ztP8-%NW#56EcFN4%NU;oJu=eZA_(ERwa6uB>Q21HGPZKF@3%8mtKzLo7Lb4Qfmz?> zn4arEczcPeeh5!i(A2}9Jt6Y9V6f=&>j}kwmWvU{Luv?c3lSSjAlT3zZ%_^ocA|t$ zU>Z)z(LgpKeJ^-D0yAYjf>pcCHOl1`+?&A4q9uQe;_LV%^B{S7k+X%-C}VWmhr#10 z;l?~%BCnRV^`Gx0Q{&g?(njPYm(_})f{VtR%jAsu?N>fnbPCTbqk1}y%_lAr;1R*? z{EQFK*|v88WI*t}tg+q@6|w0nt#Olph`Y{7RL>qRx0qVCC*gF;n3s(?eR>8z<9x z5G2E#EL?26`^e^>T|}L}PC>wm7QDi>4?HU+)CN*0lA41m<%o=L64GDHA!#e!^==5K z9J51KILBw8qLDlxekDIMuU^8vzdT(7H8DJMfg?e%$54x^r5xhlw(bt3a?z?~95Ozp z#z-s<)S3@rtO;}vyufjK0W#@RgklUEn$?Pr@p{k??0oOHv2KSdl;%rMz)bpUrpwG6dy=p%LQuZeuUyS=*XNxH2ggi;r}%WbCzsIgpOwHO@q95qIQ}*m?SNj5Y~ms{70Z)5Wyy^SyfTJk6^X zS7J)iYYv2`tpi>v=>2^|{}ARO44w@u&}ozTdJ=6N9`-#d{-qP&WXdM6Il5#!>j} z*+rW9Hsi8gOJetAL*jFP-(w6z0RxMbU+}7x!Avhx5N?<08z}V+C z0c5M`oWW{MQtP<+RHPTupYLk=Jo&f!;06k?9J@|0p4gkUFmz&rR`t@UMlKdL9Z?qLZC2v}M9QC+lR z-nmfHyLM(}9w`bSc+m*^`Zx9_g2CFTU7haA0*!}E4#7?LPNR(WSdl(^IQEei3Bh&CL40M%972o zP=3|sYGXXw&`g=%u@_3MH;-kGHp){dth|Zyvc82RZ=T0g$INcdS1U~9C52-P&xAA##-x(Y5WmM&Cgz(^gN-j z+yg14H^XY+8Y!otXsHZ$gKH9mr&7-=-Fs%^?m(DpT9Jp8Dgj|S!JFT5J$S1n_>1Ud zI8%y9%MQzMfwj`osl0CT?X69Fm@rHES>w&aZ> zNrrDIRk!$Pnga;ww=%$N1c)R03D0dYL@P_rFFI_4Gd{LM^|iss-!$m?hy~>it0&KLe+IgRHi8NA0IX0!ir%p%@% z!NTrj=XjeS-->7(!2Qzm{>^gI!^te=fR9SwXWrtU z^L%p?xHmuhKPGl`7py#+1q|9@Uz!2KAGt?l_^2WB-TySf2Y_=#NZ4uC4ynrvDNy!@ zAW?~OS9{cRWY;(ULMod^*L@;?BrksL77iwiH#3+2xQ|p~=W1HVx3HK}y^!e7cR%ht zyN&WQO^c=ULEg(r6Kcs&!^pOh{1RMjwZ(({?a`#Bk-O(?iUnCWGgURMcF24_j-2;9 zv92V?a-qk8(7*jq3@kyiwOu{vTJ};J=O4?&un`XA;>ljB^MuQXi)8+lv|Rwt z&~d|I|2E3CqpI2TMjDm)BPlTPTO4mN|1Rc6*jCH=m$zuB2hXv@I8_QAph8{6L(YAz6$TD{9kPX zHXkj40{)a*__)GxDZ?2mOIU&Y==3`&%jEvpr&~ISzn%@BkHGAT=5eh*fh-?la!P(c z9W>Qk+x#!@_}9Pq%Wjk*lA;nbD>JyyQVCerx2CBfUolh&!*t7+{PSZH3K81GipNAK z_*xo)PXC-$wZs$-8*ON8(v31NpiiX0I@eb< z|I4C+=n*0qrG;HCVC(KPny1nyF5J6+QJHRwK=bb}%mx~&QJUmyP@CQ9BfI%_0fm)E z{^jlNN~M7EO$hOiO0)u&$jd!_bmm`Q?k}`=#mq{rZJ9nkJ|qy_Jk+}jGrs?i<@)=- z_;i8I8JN5Z%^iKj=vu6#Pu~q6QqQ~aZzt}o9bA(AW>!F+c2fmYxRZe#@vqA%47*qD zGlT`NsFI-1dKs0oJf)Xo{BPg27rv@zJNy>uv?_r%0y@Gz_;(qRI0zxhoI5R8;Vx+! zuCFcu>EFCR5mEdsg5u|YKkHu{lIm%nQLG=O)_oC)pnu=@{jl!`Qy#*;3;7)4Hx0h! z`XA^erv+ZJ_>qQlCB~h~L;f@n7!oKfclD8e;yAyM+DgQf)qK!cW-RDoJV4 ze>tVUzlh-zI6xdvnc+b^gYMZafH!GJ+ome?l@t~p62 zNOnQMsx{MY!VW?Rvmq!MkbC9I=9GW?hN&P-OlAvcE(s}?^tPp36qz?O(&_!}Hpkyh z`akYgB^8jNL$((#-&-BcsGc=)bks5|WlwTGf2ZN$BiYO}kGPzmA6KLDN3qcak1!x1 z*M=rz3ULlqV&@D94xi&Cy<3iS##$|mx1-*?Y$|E3D_%Cr0*leIS{LeUg@~O!+uCBd zPaXosdbjRc&U@t!=O)5ahozpZH6qG8OLjQtsdrDLWJdR1W5=WC2Q7xoR4sVF(>FPu z&*^Mpw?Ff%^UGEip&<>fo%LalvPa6qjG5(QZl~^1zM2x-p>{)$vB2U)gCO=xYIqU! zSCXH)%B|o1g7DN`Kyx6TCZz5$bM|Qbv9%w{q!}}#)$tGti?YUyB9W!pSFuIatWVgc z)~r|aKk)uzvHmnCaAY>Q;~|;}YLAlKYOoRqZVNc`c^vH>mY%~dM70&oxP^w4;&$zJ za>&A#2e1$cPp32`T?QP(Vs$l7^=5lT5yGbuOV_o(5XLM?+1Q@?rd|_yG6}bfzi?WU zyKt-5{k4&&Nm9Mgn?$F#O7eZc&*Y9C@!85=hWYjtb@ZA;`?>n{FVhy&PX=|uc*VxP z3gqZdM3|FpR6eoJwy{EAmKQq~2?pD4_Lp-tH}}Z|y^pi1X#e~<{QB*oz_mJ;**%VB z{x%iqDK-ttpUEG}!yO&%vr64U^0QodOVd)`WjTjVd=m6F_d>cAsZFVQsPKt7r>88! zmVS=o&mN4Ij_oo&H57d-Te*Dy{TEh~)x2c{{??Adbr`b#qH9jqculdk=V9KS$q%~c zKH3Jfh7T37tg$6D_Nw5ZC`;>*-`P_*BJK3wg=|1e@}-@_P!RM@9%RZi`0A~D`PFyz z{y04ZJ(NC6$r4%f7@|~Snr-W5=#chvpT|0%TP^4hweGec@oScOI@@QBzZ)oG&F{xp z>AgyY216u4O|eoj+x-roV@lLmqS|#IWRZSqX$H#(Vg2Vt1U1KP49hS&`kI+st=RG` zr&f)!`lgD)lGLW+8U{%|2i|f!VP1Xv-VaUHd0C^Lp=McHn)BxK;=C0VZsqzhggNm; z?t2!;y>(X0C({D|`X&00S4 zfFSf>9!bqs@8Oq@2(B?4X=S=Vr7FlK-og z^rTLL4W(8{#|)3|*|ZdfzA?{)u8*n;pHS@A-yc-Dnk;tbY)?_&`a@a^602X-xD7`= zQb_RWqkXoa$U0S@sJ!NJxW0Y@=PjARnyYtvu$LiA_9$}aGY(j0NObeOF1IzZW9Y6a z;ZR-QQzfdtE|K3PTR{4P^N!4p6UsvE9j}>ogIU~LUnkWQ4MN*8e>R+wpur1vWT<{q z`em}_o=I^%5_B|WipW@5Om&@Y`ucSP+dRre5i{~kDtY6_vAAY=Rm;r9S8ksvePcyr zX6JG%K~?lL3)DHJv8)o)ivN;v9ITM?rpk$PJ(_aLMiEY@^E0gyVoeU!}s@kQK9`)@J@mKGFhJs3a zex#opmy&i-pY%$>kb`kOoxuJm%M?oNUBUgMfn$&+tk?fAzgXlF1s}fly>fHpnV+N6wI@;JcfX<>aqhE7 zGc83Gi_uSU-AN&i$_ZjTWaVLekNio?p1O%HMXDLD9}UtBDL`MaxoHQ>YS`huu{K+z zMfi0j{$>}Ye5hzyD+3wy`r5w2(Xf+rHI`TD(mLv;@GESCrjQShVn0`8qwbVMD8ZT5 z-((A<@_cUn+ank>KgUdUR_LkyJZpU#UyC+3OQ)moe^47~6Zom4}Bur5y2^xi1 zYLR(|p#{ztY-dsVEXfE?5Of-ZmNp$O-$Uf$azJB~mK-W#Nf~br%!j|kX}@B{xd5qe zlv)Gd2u+Ne;baRa9Vr3p!m=mgOY<1Q(`u4j zYnBWdp9Ye-;>uKo_vGR#f)`h1BYZ-OrGZElz_TrImKiCdSu4PUQ$hGW3-eInJE0US zBDm;IlG$6i@^V=f)_p79tyh_76DlV}ac#-?f?2x;2VfIh`X z1j>hCWO<`3YYJ>K-%7_3_wmOFb3^QQWAqjqy?6MS&s(aT(SqDYubcpVIi24PwwtVB z(Q}IhxiEIr+us|Bj6b(tXqLrq*LtgRgU zy3Qe-v=*jKh!yI3rWG@LtC3<((}_sk^SO0DhoHuqrWty2aV34df`D+TQe&NaE((o# z4L&z(0trRTrp6P9++jso#-SHaP5{|LFxwSqP}ir|_7a&9A!gTj^W`ny2^ytQ<@lB_ zOUK`Tt68=v+X2d$qPVX~zqkO%wl#=S3SQR2?PXeYvC7|G?@mD|4!q4rMZxa%^V{4< z6|eJj8Km?2AY?Sb+)bM3Meu3t?TO)hSk|elR&#as3T?&?sYtHfar>yC-_%;R z=B8dM1VzKn4+|1x@cBTAQ-Tu8_r@S{-NZ&(6O^G`?+2D!vMFyU!HwAKMeVIJ) zy?6@vb1ut3YZ5vyCyI*VP;`K*xhe19h3-4ebNBf?jzOU$o3*WYd0=ppe5X*}q=giL z&=yb6#X;#X^GPSL3SUgZ7WpeYDDa(LV;$;Ar#71RE_M7Y*!EJWd^&_a{u||1w+ckx zs+kn6PlZzFehZnHBzyj0C!QY5*=)i3BKVZM=}u}qaC2H@b{h_qS!$MX1~rfYY5&IK z(PYk3u{vXnWsH{abXzX;UMb}Ht!d)bFG4A2n0Vg<&g-lCAl4GK@m}fVUMPfuv?=*% zs&Z_{xPYg-o?kBql_twjtg}8MOe-0keoM#qlE!zFqJs#*XWOUG)W?bneSXe=N(NOZ zyg|bu`}KBagfBE(R^TfWAVNt;uRASZs|J7#29#B_hTf$dQ)rA3n!omI9$!;Lh3f{Uxrt=a ze4WacCOq}=R`wUnhYRdXsI&#(Q9mx>D;`b~a4>bI z%$=6d2ct9sML$ceY4l}Z#si%Q|H?Umtj1QkS;7Ga8S4cS50feFQe#9rl;=E(@C+A% z)AuaG=_(}46~+B6U-@-;pG(YShJzQY4qyLSAt9_^lu z{&pXqoJbxN210Bz82;XwuC+Vws+bL4NK8Le3BGmlt*Y9kTS;;xkyR+L^!_-<=nMSy zvfQIBTBxk*=+o$%GCA3?jl5w_tw>9>Ur+z%4>P=~!4wfcFy*}*gkRXA0Ow|eVNL!N zp7k+Nig;GNM`!O2V=Av36Dewbf|2aNJnoG*{Tz*140%wah49Ck&e!tr59&C44-ra5 zm_r&?Z{e1mLP|Ysp5MJdUt@|XJc8IMM5bpw=rtW=aFa~zU>o>`PoG4EmvKa4C6ZG5 z!eA4e*c+J_WeC-v!+Bh$5A|<^JRI|*OjGkE+S^bgKF}7zChN$a)j=VlvWOU)@{0(c z1vo0p>I+%jP$Jeo%ZGoNFIo=e)3o_EgNp9XEnzb$cDe(ccT?sK*(;I?es8HW_Y6Wt z_2`aRCr$Bzh^L=50#5q}1UTg8F4vxB2$h*9fS_iB$wZ@YoSV1;?2xO7K3QR+{f`Ku zW4REtKcwkuR%gHdij9?uJSU&;yWTwGJC5g<5lmL`992(VV6pB;l;u3*sKiX<*GGPS z51aZR;pX@R@uzICOq`u`C5VoHe{r{)HRtz4M%v&}Yozk1U^U>P+&ZYjhn7aFeLRgv=kp^RmGNNE z4mjU5!Xu?=i;R>AOEdIh3!^Y%tf6=&G($%2`A7<%=ZK$M8Nvhy9f)=~ew6WBTKdg} zI_L7d5kX*$^KYtOo7*%EUL)-#%pG6DcfBku9pD`S)}~{pL(#9O{#X9~pTO^g&+5(` zjf$CqKTMT^6!+U65tWkXdwsxvNyw=71A~JQwE;AfLz+L2zdeh1yYxpbz^IM`i;8pL zlLT>hGt#c2uP5D?$tapjJ+-CO2`B;A>^$?nTidwnEPpbScuJiGYQ7><8>vdnCMK}g~jXS&4zH=7a`xET^u{Gdx)_J)jx-LGq#vqXki!LXZLE-sA z8Qkn+t|W8^LR*OAiYXQXYGew*jT5gjQL(WOx-wbg%s|}yZ-xY3Y_oewM%lCM!tkj)Sy`mJ)IXi_U8zOt88$c!_!S#J z=+*R}bu2!XfHD$#CZ1eLDb(X)+yOg{cC!6+?)}U3vw}S!do_Dm53(*)a~E8!nqKm_joKm2BJQh1@5;u=HJ?f9OW3t>Q7|GSYKg=4k(CVKdk6C z8n#J@%tj#ccJ6M#`RxaSHA5lII$t2WnaxijViS z2_{=Q^m;BTA(E~Q`_%0Gt0ox-_G>>eL02hq zy$m;hr?`t=CU=i~K`!Qem72c>$4gn0hiY77TI(D26V8)$o)ORa%?egt%oZPE2(uaN zT68pMn=Mu`=(U-Wh&L=Z+s6AW=+}gpUo&x}HmBnxNKxNnu54;Oy38#hq2K?NlH=~75(dJn%^B5c>^*VuO(iWIWkZ%!&jUxk(_!A`y+V(~=sYq58R zuC5nYTAxdLmz$scq)8{{T|e59PuP7#?2Fl6g7Z{Q?ydIo-?3$b&<#K!%@ta9^g|vn z7mc0zh^Kru=I^<(aLwL6cVJxnl)4qM^njwm=WW|}e1HZy!CdpPxC$>%eS0&}$Q?iC zcrM)f@@OC*w@aayml!-S7TcDmc=hTdqE4Z_qGB*0w~7$p-=-qpnHy`X7j|wtDCt$o z*ikdlP>G8?qgRSDB?VM%P&>e7&Ig^<9YQv#8R+qP41Ky$I{)K%8II?2gHfxitYCfv1B!2eQ~H7 zO__8yQKy5K`02B#`30z6V)D)u4|B{q5xrhokLxD8&5z2`iLeDR^F;)(dwrQohTQnH z<7mtD%2KLQOmM5CHU?wgty_3okMpG?=oSXet#EplBTXsQ;*(bOjmKSWr$@RC zWnsw*v!agELVpg1`Rgi5sZti~GIaINz9Yo7zvKa@2s%B)?$60zcgv`sJW)S!&NbZR zF-s+FDn6*pxn^|lhoVY&9!rZ?7wr45a=v)lX}3IQk$mHk`-3ao{1*35vUMy^f~ z6m=+;Sy-;npFH|9Jm+@#(%{`C()|JO8n<<`t&a%`V57c6HZFwr8d)+C6yQk>uM%)! z`q04ih^5$`p17J4-r>vUzBm~UeB<{+qBG#dS;KqgScpFckGyCZD)O$-t*@V&(#~1w zm)LrY4vl_g>$Xh4*C+R}w+a93wmbFf^xeH{P5r`RWM8Lyw27z2o7@SzS2>hSwhT4H z$+PIY`IcmKrMcWz-J38{MOj^>_B)CmS zSR9EWByuda&7PiKz{2F^IPS1zl!DF=6xa3Na(S%HhG{E{yc4YF?}V^O;B|{~rxotW zOrqYA*b@CaC^IBd9jtyTWw#lnRrHRQ2slsbKEGg%iwO1cVj!gqf`ZuCen$soQ`<^X zcLSU-0-zaxC*11w``g=*JUHyQq|h$%;j zf*V~^(u6|g*n5`4T#?^b@(Z?7lR|-60majKPxtEvzRMo)#91`+S?R{=5`yv&HZT3 zFx;*`4Sc>_jpin)2>-&a0Dt^Rl{g-H)(dmExbp4X*vy<&-TEs!bUV%qG^AkC?m$lo zURhRYQF>1~vO)b-!jmNymi1YI+0rw7+qH19wRvt4*xSu_4T|gsc*0s2b);`kS|6Wx z@6!)auz@9e9$WV;|F8G<*Qy-d%SBnR$1o+EeIM6Qr3B-DJPd`;g98sH_?2FaHSxNi zx{G?8O&PPW*9G#-o5Kx>gDN&96Bk?|6}KOFiHQfrBdYc=i$+roMubG{g??ZovxjhN z*WO73U0G1F4Xwd)+>Rkez`CO)ownb`1M7puIbfi_|!haLE+dp<_H1iu@eWDK$=;QJ=5xg-I_ea=!D+d+jJK@nt&g^o~d>2`%ugU>nD)g zb`Pztqjeyum)o)Xpka~R&@}Uorn67{pXesbhOo>)Uzh=A^@{ybf|)z#SU}~rkMqA) z=Z|B#d2JuLZ|kJWAqn0q*w^sQeD4Xa{i_@C8AZJp`2Rf0Ut}u9{Zfpmyti#H$&7S7 zs=AjlE<9=OIg4=P*K5kv)8F5ljDLA0!OaFk0<=I$%;Hd-Nh#5RBZBx{KTN`DJW3bj zKoRkLIt7XlzthIof(c9}S+&)zDiW>}4Jvwu@>!CaY$JF03rG#NKdH|{eO;L743WPg z;z6BLaI2joMImxelnS@@#aQK8H=@2{yGi6n2zSOE_Ry0o3<*~r?i(D6<7J+ovdL-) zpH$j@D=Tr^MS9jH7UTgSyqeYm0TjVKmoc=H4o%_q~T2pFe5id%zafcS0t*bcf_`~@~`O!SH;=T@b zPbt5vU%X;iW*Q6<_dRRQAFUW>+`oVSd8^h)sdwxhJvQ${*&>|#fS&v0RJ=!16KzAU zLbA~*Vjs!6^Fp>l7x%d$iNg?_^#)#zGvZy`)!o@KQg8M-+Qemr1sWDl(_UX9xTUQ) z6T3C(WUU9%ww-GG`QshK0KjtmUSo$Un`o~u zHn}b5uARMYA~w-JzpiCy#C?eGMI@PYZzn?mX|V(V$1B`VmRRYS&yf7uTX3EG-1gLC zBXGn_ox_{4s9v&$t|Z z@)6sYqer0-`tspn<=W(lbM$|%^;Mb!*&Nril&kCVQ~(=n*rxwypQy*eGB7$>G(9l|;^>)<^xDsVVC{GJCr{RPDRG|4Mv zi{2T2WS;)s)7+|Tc-TCC)}>%iOOU|9fu1BK?^H~fWm}eYT_QOJT16T@SMRJL>J~Mv zwnIL7!p_dFdP?wgbH0^k?R>{*L;UlV`yy#9MVc{q3?agtMoAhAllW#kVsJ3Qv12HX zRp|CDtWA@rC7lS7X5f%+b5VBjD7yCWC;^ohaYoN_si-bF9vtHd3*nEwwhV^Uo$Kcs zrb@9Dh*Yk$bq)CNhpc!oc#Q(}k(C)pNj}o?l^PaZD6zB!OfcMB^lDqfqULtDIO%#^ zFK3$z)ErE{R{B)i2<8ah8CIR;k5%D@yv1!U{ZqW80ho7AmOEV~&e?WO%W>v(*N9lB zR&BGTE`v)R^||*4DF2JacG$K;owMi6zK>nuL#!s3|NJdhy1kXT)8AfbN1H`m^ByFW zMZ{j%7ii_QSo`VI0s5)exN6r9r^zmDB!Eda>6oh6I0NS-e4sWm$X{iZo)||iv@wWh zw`!1kr@k(GZwLL&rkB^QmwUK=e*)H$^;nwYSgZE!#tXM6zj4(sjkaam>=LrbPZDt) z)jBhKt9g*n_js~OoGRVQE+(2wFZHn1y1|N1D-nIH`_jJqL=4ae)VTrrDb@q;i074zaxT7{KaT|{(g9mk$fK=)>5+o%bshM>R z{ln*CoAR3XeuG9t))w1FaLGM{Z6$#y?q$7#Nd;-u$?t##%+SiXmP@nTDg{s}q%+QY z-L7x6>cQ@QF75e=DkpLfkONE|$m_r}X>g0Lp==_?E6aMPpzVM}VZlT*w6BrAr#fjp+(lp>apaT@e$?N8 zvsQdGfw(N7hy}o-k?9VLzD~id=b9yQZ6P871)?p9R^(RuNom4qJ-3k*uXd7eGZNXt zr!EPI2|?!J3f(R8z884tCtjv*w2k&B-5JK+QHo1ePgAb0hve?|(uZb=lKdfaJBc%O zXKo$%v^p9=cF%NJ#(x^rHu{vOl-QG&$Vcxta`61k2PcBlrV892or%gH6Q`aRr*rb` z+8n(8N3i>ce|)oRZzXMANXefzXa<*{tMj-oeAW#+Sj*|7y4$emGJ29~WMstk_;uT6 zbE&I@nenb7{`7zz09R!Nu{1K7x&Z_tf{mSb? z^%vsq1?Km;+Enu(*}PD@25>|BZrhRj06!%UcL*MTiK?TdZ?m}1V;$r?UB#>uw+M}N zS;Og^hhS@BHbDy!_$<{A(rw?z=lUv$}3| z7q1d8k!000dIR_Hlc!68U`xRh9AmBLO|m|Jg3J;yUE3CU8s=@;{xHUW3q<-u?v_z% zJui{jbLLY_?72Tj(@UA}YVdDp9g?9S*oMiy6X^(5_NEpg)igsf{5Uurnqj7Tiq_@% zv5-+!mh~C3Y@KHlB;%~)C9iUN9gkQdEgs1e&W-$p zRSId)avEz!n@b1m2|sUA<03Avvu4pC-+9Vy4?XW1IM%^1@rn5^5-s-CjjG(v%MJ1I z!giNQyea-|*VWbUMayv~S)g9Yhoh-Iyu*%&mtMxM-^Qi^4tIrAmTYBiuBhWt%pg7n z0sfM;@u3fWw4?4!A8@aC{BckJ7~_m26f??|`r9%&EjR+2XKp)x`g17xUJ**FoLin) z>#4gqIB}kn+7WDf(l# zzX#MXWht)3Ct}=4p23^T(8(FZF3HU)o{YA&oXXYn(>LY@nQk8J7(8?Th z)>Ei!yTNj|6T=Z|QsPm*W#D@B&(=!{p<^vpn}60* zlN7C~98j>oWv|{*W6)SNrK@pfE9~5T@M|7N=gVtPb8m@`NHo5=w^K&v;i$f2Jh+*1 z#s&M3)#W(qT$`**e_QomSKg;|KXkHlj`+*EKhp7`^|Tk=zBrV8cSnvu<~J56o1vj1 zeZ4~e2Z#5_Nucjfw#04YA-;GS?xXvCxG`Dw(G_l%Snu^ES@ZXKyoL!S-jKzG;FZ?> zci0GcV6yegZl!extwBD ztQ2$Md13$1VrI^otHhR9`|)w;H1I?@ri|m4>2zW~-G+Vn)0;P8yu9nGZhQepOX9JU zB|D#cU?!RbSMdD@9-565L3U;h{(YX6sfHNoA5Yr%%w0i-{*DDT+Il6GIDL?&A1fto z-LXF1tRg9x)<2XVmrHrVM(IB%j%RJdDWbctp$j)}k zD5EHj$jr(-)*)Lt+dd#`09;pKpPqAe+!j+02{_mE&%mI zOe|HHhV|9m#X;SHqf%|v|1?72Y!@x*2&NM#{`8a0BFV(+@y(ZQPQBY{GY!rgLNAH< zVLLOn(GX;L<~wbG0DPcs41zPX<8DG6Lk2+s2xd*P)Xv*loJ`GNL~13Op9Ome75@>V zJ(p;ZOX9Ssiim91Wzv|#(#RK{~#p`o41RA_R1A;S65d(=C7@& zrBk}5q0#b*k{!;qOs8RwiK?oqg7iN5d@FHUu-OpLcn$)UVoo6C#Blb<2e_Z6E6V2k zH~a%Pi|z6iq?k;bTuoqCay7~Fgf;s+mghgOY244gwhJh1gZN=mqSSqx)GD>mtF*K} zLDF0I#Ja!7HFIX)IFqbX?#}A5^+TfYv*fV_HGd3;>A=CZ^-ZyAo=GseP>iOG5&iqFb!`GmGK}hVKQB zjd#PjI83oEi2E+Gxx7lBRLDNwrv_KBL!=|vCBXHwI1759$}$+3T9WUdoba#+FI6lv z37(Fy*cF$3v=EN;WIm?)9QWQ!);Oup7jemVyV+k-0txpC(Y)*VuPO$<_z#17VK~Fs zAGF^XYt48Q1+P*kfH+~68Pj-pJ=kTOKY^&&;Wa?GkK{j;` zHbmF^tJ6fx6kl4`ILgjMB=_~}^Uq$qIAfNZS*W#8NGl;Oeq)QZ-RX(C^y}U{%O37R zkOouR(~_Z~0=7K_NyUa5kh9M*`f@*7kEZNf&mN(M3NSF!H%N`#cV@+aJ-~tJ8cLld zI`^|&wf{oUJo2Fha|$dS>Wj!9cDiX5i+T|TcKpmTk+`hv zi-)20o$j$DS$P(^GU~XZA|g20!A(xbBG^B&klnUP0DS=}b0(NI^2NdF!e!~lQ`F;#Ak}J8j!VE;d1xBc=v+ zmS(XX3%TGdL*I+Y1z32ALZY*Thmk=V*;<1eWnHGb5`3C)B3?>BVF zy*~C%W0C#DZ10U?A%LKjXc8>$7s3aL1vM4UBymM`H?z${J$}p3ls)km6pWS56Ce8- zq{`rb;LyF#d18>x?BUU~6i}pNf>(7fDzW#+y(cIfxNVQ_?5%t9D!5KTd-Q|Z-cJWT z<($ou#h4I}sOo2besY%$s2g$oR0!$YXd6k|1r8+c4i%wK0r%nsKe->i`^#tWi{I;b zqxhX)_MqH*g5E-p^Hnf7v-1}87J6Tl++=Wl=a;>=_nx5bz?kB#Up9KO^A_|edR+D~ z{lcAJ_TJumf*OEvev=Ev%R6s*ZlV!8Bdtn!=mI@;FxRnv#~;4jquFsuAi_mJ1BiPZ zI~+X(@_;IW-RWv|5Gtu`z22j5<`5A+hoEL)m|(P9Qs;2IK{wPy+)wq@!s&);^LyK zE1ANsN!xp4|5FeTrUq`7oz&Qb?~tW&ci=9)2yCzS~t-^gt)8bpj@s&>FyV>3enm|>cNa@ zs{pw?CSzD?_w(KL?vQNnZ3uNx3|2m5_YfR34q9~@`o4>zb`9#(t%o&O3i9%w`yaNu zG^C$C6hP|xI1dObSQpu7pDOb{6xK-76Pf&$WxazFYC#-L3kZiD2{G~Ixw$z)h7b9B z%4?5b_MQaqR48Mn%VFpT?!k-$E+0@&u(QMDeJZw}n-5qfpgfjp;liNh4geUi4Fa$y zqDuY#?10zz@Dp?f>ld)|)18nQP#4ZU26pLiiBO`#p>E}9MmgFp?)&!vfy z4(1&pBp^cz>R_bmp4)y96rKbX13<6tAUX>)m1kOyI>ET-T<`sH?`f7Eis8Rcuqxca zWRs4x>5!cd?Y+D2sUjJCUHjB7f-qh16o2ZY?6Vp6emd}}8%V+&_uk6e6+o>rAIe*4 zPkrtEaqmeLJxp-#|Nq~H>9WB<+^ohcMJD`)eTHKBie@;p@(|pL;>)K2d`JX5Wo`QChs=?NElSGH%24amq zg^G+RrLVK+xyqku{q$9iE4B??)`MZ?vCVc0+n4P;6W`KM`E_#%;o)?{IX>4lrr+Eu zdeYrEIZuK5f)DX3s`zYbj?RLh;I`8&`pOxUOsp>cj0ihha8pyw1N)ydmJ%E62O-P{ zIo5kv^2+gK2Ho7Vvj^tk4POj)Kil!c`WJaA^a!sN139^?6Py`<)1)amF{ zgO{%(8Eyu4k*@~;CwgKt=k|eg-w{q&Jtw(>oZt1jIn)bNsiCCI?p&i4|%5bg;0ynV7c-mAw&!_ z!<_`3cVSLe)<{5ll(FK$JL`;5Q@R;@@+qFGgR7FN!X1dThes(d$Ak~J6(6Hp^<<;#biy4YT=?Qh5d5q=L$ zKg0vG{owe;AB9Bl7_Cue>s4)5@E+xBl5xHZwVP_X0F z*s0aA(Nn7y%Q`lCiMB(2H7r408k+RRB#aTmC4E*Mmu=6vGxk~ZGKYsJCkPdDHzI}X zEwQ5G9@?BtK-zT5fFSUKW1#Sj<=D$ty8!~YQFvf z5w;X|k^zqI-0(zuL)^$c>qANT+Luwu@sDNbHF;mp+8o`SHyw9absEL&@c0pQmR?>s zRhi{_=yfK#r3p=@r3pn4@W3;UqLYbyiQt?Jxf-)|%^z+B4>;!Q$|9^`=6-9~onD?e2 zg&zW9hB}orx1HG)%v#=Exhi?~vn5&l0+f^C)LN9NN$O#h{QUtxrZ7b1b>0q1OEXusimr{}HUYx{#pDEwjk^xhYf_S- zOe`uo?y%47??1mI2-Skf&9vFN8&|5>{H&kzj9%YN#k7&>c)Z|-UXz`+a;K^IS(MTu$?dKpbYoC%5YS2q9Sh0$CpyH- zYHzadV1{;B=+zsaW)7IhX;Cz6duI#czgoHkZVI|-=gD_drg;iG#+q%;AEy4_SJL}@zc%d^8)Cf=#puawC@q@-7NJ-e7-s4$bO4wU1mi^%R%?KnlZ_~NZp_@@iB zVsUFLWz_}?9>dE=yB!un=UdGkj~AVaX*2!#(4&$jMSF@Dv9!4;<?&d+ zv-FkDODan?JEQl8Fdf$C7>1g=I9h<)NjfSUY#N@y(D%g zGV|hF!-v~%h9+4*;AC%YtUnE5vWBDNLX-u_IL*8_%0{gUk*g$)bI+aK(^M}CxXTko zhsPM!@b)}H>sLBTONQlM`>4J`xJQ|i^VRPss+u=1W%wk*5;|41hwKsMY`>NXP`Qn% zDtBkjhfPJWKd@KII62g9lbG0jJ261V?3``sOxBbymD}~rnCO_Pl5q!-T1ioo44@z- z3Ud+@#F8fh{Y*ZbUY-fS9U_&Ppb2G&G{r1hSS}Bkd1b62($hEJ;pXad-YrAu*b5$y zbp?m*xfj{fQfV<%i^XJ<{Do&=K}6TIMG`dM1gW$7AAAbB*+`VsxUE0}3mr(t)pic3Ob)YpOFRYar!Rp=>H|jFq;-gcGDpT*2Wl6VtZg4K$ z$r38%6uW@0J;80mu296wE7a&z}p%ZSyLdnq=0w+D^S@XwXGZhRwMSh$+&EU}b2 zD)CiYp%k=65!N7d=bWLbDIqf(W7EwvQM!tX`*0Ee9R-hrAUt#99{bY)e*V|pT5l(m zIXo7>bjAd7S^?-+NwtzwDNwvLjO82O-pLG26e{$Uw8?Lpc2s9Hu4l7_Cgcr-DQ`8- zEq{oYPwu!@bC&Ga>U}+(+Y{+;D^W4-rq86KUX3h8E-?-SK76^Zx?o?_P0q#q)p+@j znS-zQ>MTa_{+h%>AZw+|Q17CL_}Mi%T|S6j*q$bn7llra8r`oZa}(p-&-KPM_b}cH zBOanEn0Zy~Tw-4Gq(`5;svefXR8Y?462-ePFTY^?2^aa&@*??DV|#8Llx9Imxf*lL z<^q(lu%~&2HGISQflU4Kx zGh3~q+joHb@RoV#@dcNy6*02jHJ@SSD5+7qz)@{2+L^kw&W+9VA(ofyH(PRWJy2V& zTc2zskPF{m94}o7{=T96q;2>Zl?6-bwD~n)qS^SH?0f8%HQ)qr7XnGdg}F7lm%Lxb z9J{8KN*3QXC5yBsC6b4FyDEKGf7#ge?rGv+tz%h8TV*EumijS1va^kI)vr+o-9s(f zsEz4`uYqu#zpb=3jQ+OV8mB1n_l|PW0}T`;O`QZ z4BbFry;9yV>!9h7SDHxK;*!#I^tCSc3AY^kS!|z<+vwcm_++tU#)m@1exBr(Ewkz; z4_W?3P0QldYTYu@9Nn=q^FMz$jg)4lT4hq1P~UsRc5$1P4HzWK<6b2CB3mgSWj;rVYSfBlq3q#9+OWM5n!iZ{km zFec$qkYEam`vAp@&}? z)xM3$o8azHjZwC4#8Wjyv6yNXE#^`sq6Tf53(Qu`wnEku)iFpuG;(gyPPs^Dch97@ z4_~NQn|yNo>Q*>Tj`gdVaTluEpY#`Cf4oZ7XVYG0 zG7jFPJ5}7sH1X`WA_1un*XqVM*EU^&^UZ2EWV0~WNvlwP`4sjgb#TrmOeOF}Y&UAO zFT}I+n;^KoPx)FTk$C_FW~`&epdjBG?&9`y>->f0nIwvt9+lZoxstDBItQW+(3=g^ zD$=ko{e;#~_k3a?s(e$gAa9EGTS>lsO`5O<=J?O8pG;D!Vlf7TZkzDw9<~R0mQEfiiv7}|Z_`t` zobpR-?@ku>79rEB9;fgb0B`o0rD;aeaIgH$;-zAb-i#UPRoB|*k=+AGtG4j<2w~Jz zp-Px~s=bqTvL0fEKX5p+wGF#DLhhK!D`R&H>RLhV)YdAr)}8x zab|U76^BeTH**{3MN@4AENq$1o{PWLtcUHU(M~Ra@-#_;&*#1edCaFAR`7H!)HV}B zSd@5w(Ce>D=jORfQ92cLJ2!>iKCSD-Ur*vU0U}pw`8H{**)I0kbiOR`kTd0T>gLLK z$WXdLjNca)>W(!|*3H2SU6tahPgm+tUn^eMj4^*Gc}%YZ`LSvFN=xA<2+{cMHS#r? zugj)aE_Sxy|I|dLc6~Gys7e^qF;pXzdSc#bwvZ$m=7dyiMsA?@BtxVDW@HLNz2sfr zG>w!kZo7G7Am!gEKd=p?YC9Doe--r-Y=UaitlW({+<`qhHSK5%$GoZ2*N3)SR;S9@ z>f;Bvu|I+sqw=F@#^OV_FCWD#`2S=beS8Yfpblb#m7k#u<*pS~vm6JODZsVN0!m!F z{?p}|hjUpQvTUS&oR7mQD?i`1=O-SclT1SB6ZUHfLdOLA-1sX@;vANbDH!x{m4}Bp zzObMcI-ZYbQd`C{e@fRWf2GE90KadW#nHi~%_f{NFGLB43IBqrhOoz{Qnw#c2oDCU zyK4H{UkhURrgK@_%K|(S@6@z0zvvyIxFWE%=$ltKQ(mp_49mZpZY|+G zd7df8d|lk)@)#M?OpzVS&PsaYqtzF>He~0>Ol{cAOT5w0Zw+jYTU%l1$UwYEVtL%1 z^qZHbL$-S!N>tdIn}1BPZDb;;-jRo}V&5E-^RoB_PHL zcUkjXQ9-v|`RdY%6xOwj?M^GR8c7S;TLz!MvRX_#*VyKm)2H@lty4zyFP$!1dtm;o zG*`QVU-Kzc6%VNW8ht~BL)>Q1y~8$|>-F&-ek#MUUnw74a-&*_>4Up(Z@5T{UU9p` z#^3$}xow_VB5qXPqg2PD@NvE)lkfP|kzY5p+l)z~=TC()z3CCle4qaEk@Ay92_3Kl z9c$hyimlUciMZH^oRO|6h50;J&fJoqba99?ptxnwi_nYz!sblIJd=Y55>2w)g+?@{ z?e8{DdT@vwV;Av{9<$lUOs z3DmZytQyO{p7r;}P0qtolMzL&bmZJ`;aq_K_9-$s+!6hVj)n42tFmpa#OGUqPzOC0KQ^aT4`V^^dVU)n*<&Y$Eu&|dw%PYGMi)(k zXESAiMTFu@P5m%5$&KqAICUdcBaY(-W( z7*Tm}mzh*L|1c}K?{W7Z_9);C4FyMUFYv^apM!SQiWEraOs0r7ud=#rxXBN{wc)qw zT5ICJJT$0gFMeH{Klz2GcJ52-m~srb^7%$dp(TwgPB>Y4tgT;joxeE^OT9tKbeZ6b zBCXsquADYEcAYjqy)3dk>H35>^kHtKQu@0&eS9M1827h{Q^2_kZqeDk6g7~f@hd$v ziv4`(tA>rtIrI+IN$Lg-iwBCwdOoCt&mx2HzF($BOLy4ll6wDL)X$9N))iMBzAl65 zPM7*e1t2$eV#76~KTKykB>PoaNXG(~vs?Pi;#W48YVXjxn*s}`)CzfDPzlf@DXzQJ z&h@)2*Vwm3o-lEG&fK*fqQA8w&meesKw zitGdGzTLBB#_9)0J&e4lP_09cTJ)}f;A(n8!W9W=X?6(?y9y}vOB~UXt!Gzct=F#B z%YAFm4|h*HF&0SH?mScLGOSrpAGGdOqio5+pi+1SpTIPzs&Dp`FVfr|8Q?U=^6fh> zJ-!wBV6IJwGL2cwB;IA=g>s)T@kBhsW8CUGxZEo1U5HRz`!vaKFO9#I1qjWrPnR2Z zR8ukcUs^u?C|_Zbq(WXcBTK1FlRl_#ZTf^(BrF|&{(aQX$`_#hMydb8QjKRgHxskJmceA+)Qmf zi+vvF#*8n{ykkW0wN=ir$^Nm=WyjZ8+)C5S8zO95ORNljWPQ0qKA~wY;NJY;#rZ|y z?x)bFc(Y|2LpGZPt8Yk?0d>aHqRuM zsXlJmq~^2DXb6!-O8aK88eD^eDIqQ{GJxd_DoAk@HNreTxbgGe;Dy4Ah(IkkX0%x)={k+NMAx#UJz~0d(YEBq-g8DVFHi49wnSsZkou__Ig2v ztFXVwtkTN&9~V%E{H%4~amx^Nw79vn+Y6*bn5;15FSFZ{|BjRjx@qjoiP&+acMieO z%9J_)2am&ZSZz}>ium`5{0&-TbBMw9hl}keoHjaea;liygL|~dx}EY=E`zOc=GnQJ zc-CZVL?-)S#`=&4m*&f!%K^InxSE=SZ%~wM8VggagH&S+E1FU!v|T5*5zdNFoLdq^ zUa2=04J&QeZf>%SZPYI^F}gX`A#j%+i?pXr6~4YR8NLnN%PX7FYxNu1X@-##$8|gw z(z+KaTq>>BmrWmRY*xh#7lDqt#Z<5F*DZZpw-)_T#pkJ%CMu$KldI0;#q#Hq{yD?- z4_nAT&9JwX?Hm=ZvwJQ!HWh%n{B<1ld$nxXNlJA} zQ1#WyqcJL#5bS7f`_6ali4N0B?=-nk=y;MtV!RM)c_YEv;eUVD@?{S_Hj;{{Rf26@*Trd|iJvbXI-7pI3KZXm9 zmS=+*s|G16ibksa1I(1U^CFs_wh54o7TF;(%vYotMK@=Bq)M@LK-8itIi;)CC8@*- z!Cstg1RWhNs#3A1+mt?LbzQ*Cr6M$o-5}Xvoo$PwQ)8T`z^RMM#(eTKMaI{@FKOol zE}c{1Utp>GXE(yXUa#(Ord6&t702za%CaN0kHc-W`zvr-hhMmDOHMIfueLN>u(e2D znJ16&i(c-Q=mA^PX`#O49+%VAnWe`KmwP>iUTkhl7TQWmhWb~36A7&g7TD$=Xfc~l zM&Q<{8I#~Y!UtaDtrMSrg^w&POM(rO-N#9s!+aA6=&hpK@0c`s?Gz6CO3ro#&25KC zF9W0K5Rh3af%Hd(f|Ek#*`DZI){@UDYeI62!xq;cXvOq+bqI^)tr3R}a3~mZ%^j{L zQ4Mrk##G*ysP&cgANM+fqiGT)qYV{(ied+Dt0^-ztsdts7w6wH@b7NQe;(i*XCWu` ztEEnkuF=8kI&XtyXSYqL%0uxeF8A^p6P@=%P0XV0)o!l*+j@lwola;=DWbt@Cdbk~ zr?~y*YGyIAC{X!|%e=P;f0{bZT4aZnW2nzkng&ao<5<_-;sA_h;8C2gOJzbdRcnvN zO!UaG1$_&1{}q3auZ{zpRuxK9^w0!p6yn0N0CSVTzg8#h*I56n#b8;a*P3^-l1|=W zhJ0Fc)3+sLhz<3~i|U0>ts5+~md^K_DGrpBoF43_m$-AU( z-yJX^7+#RaU#Qhu$&i*%8zsT0F`MROM!cj0A=Z0Js2a-?-dn3)%aN?SW5l)>{{)E7 zt`(_bV0nWanY64SCzitlR9RTwWEG7f<`_~>R5OFi+{Lq8`oUcc5E__}?+Z{u%Wf(AqfT$MnK%Au&a;kk&A;9qV=a!>K<<*Lpo`@c zK~d$~ri$b|2v<{(gc1^|QWl{3=)>*CSWBgNJf+w{v@>|I+@R`t)P?yb)CEyS0sO=$ zN6QPhXSy5Zw<6`yk>#sJmP&)S<5+zHR#cN}wX9eiS0njtoNC$b+``2QzJ^YnYwvAh z(vX#1w(KK&#yNT6FfoBvM5A-2<>A2bv%WPb=&f2<6B zWHAJyDAm9soLr%7^X7P47j}ATToScz8T9gMUG>~%_YdYD+EZd0f|lw_!f*@vo5IvY z*ne*IxOee74Zk);V3*j({Eo|qQ{X8&na{BEqNXqgqp;2uI7pi^#?G|dh(O1KR?IUd zLyNKy7N(r8%f-@>`O2i%`z9R&i+q!~T0=lUB-p3no+t^UXfIx~Rq&My$4_5)#p$5Q z&{Sz(*LC~`IaVCY?{u(tGmn=Jcci?s(bZgHjl!psy>D`92w}h;o-^rLz1V=KkV%MV zp#;jVJHqA=8*A%2^P_!Y3Bq&t<&ic{h3_SuK*yzplW7^Z5SEJ(t0C{FPdlY<*RF0T zCW%ph1_@2x?XYaky_A&yK2gu3^9FmvbM--Pgcz*%MQyMlV?l)MihBzN*c^Rxb-kqE zn^RFBFbU7sq7BzL|2v2-bIs}*@-n!m2J~j@4Y_c5uH3vLx)kGk&!yw%M#gZ%PXR8u zCcYc!d!hNVQ6n?e{>~j1m~n?VwCkRGu4!1LhxUb-nGcT>1*+nojD|C61MjIhL+gtu zk1jo=9SSnK@K3|8?&TpJ^ke|!(0>`7BvT~>+lk`TwJ*cShrfwMjP?`1W>Ca#&FrHG zHtMNQmYrQmd}hho19E!#q4J9Xzf)uS*5|?D06KF^(9l1f81|SDv@p0)$9t$nfKr

Ts34<_F$CIwEMQt>)* zg*O+UtOBlO22waFKxZ*vXN(LXA zDSrsZR`g#Ef61;F3ihr>h3mN}78-H^H)+3f*GXZL4=%t!`!3x%zk|4Q@3(eMbfFMP zHu~rGEH6A&P7gy>N-PNWrCAX3oF6*re`Vn_J-C~rYdBT?9JUR4n^#Ij63~*t9~KP6 z)-YpuDE_2ZNjswQ391so>NK%zcn(*7?k7QV7$XBib$9%y{-6-Yn6tMd;eG2nB_()@ zKkEAjSuc$dhjTiX4i{!b=+Jz-JQEC(mh5ixp2F<eQu5{J&%!nI3DWNJ6!!kMtiPGqE3qQ~;`%T$o6)eRvM-@YYbn8Frc(Yb zcE<5f^I08=a#;74d^NTo++w~5^!ss+b6kgp86fDJH^6j$fq@+AXNt~%R!P>Ayt_Gb zSus#~o75|`h#V#+8k&CTH)coq4?%Lm?6bSf3wsPw1(XP? z_tth$v>!^1KyL>V-#i0JV9OyvLDX|@`X_XUm+0B3c4!9r*VN_NJ)7dI&5^$xsZ(MD zNsqpI_^O0c(LsQ-{Pgq^6sLXXq*xt^EBu-Zg5Ca@Jm}|>?^AWr$?|A65tw5D2do~9 z7zaYQBYcz0x}bFC+`UR{m$dA?t89SseOI5X8~sV)FrN{qdx$(xnL!V`eRfr*90ssd zco^YF^WjGJ61Z1N+3Vu1uHB>^LG)s( zC`eB3{&Eo7E~$aHuEg-W>}vQ)CQv$4m;9;opx5_)+mZ#9ItZ-YwR-a5R3k~~wL7BC z^_321Xvjv4=jI=W#~Ql#ZZL~noNU6$ERS8*KFFbj5dd~4!Sek~w1W}YQ{vxAOHaEv z_iFt1K;8EQ;rl0bS|<4?V&yms@waXg=a=bwp=atD>}p}4$?A$%Umua*3v(sP68Wt_ zqvP*{BuM{!DcfbJz6W~oPD~BlS#`BPSoN95fKo9_sQnR2OBbEA%Lv4Yh?tmJ>?Lcq z|E;&%Ck?H&_wdwzJt!oS;n}ye?}5K_ks;fGxk7t0AMgYVBF!?yV5G@>_W3KvX;W3w z7rU}S@ZM9o+z-gS%@ZF4yVsXpe%^<+eZN)I?JIrU^tH&N$y?yrV-^_iIbbzM6QuX9 zE8MGI*n5%#m%?Y^K_b5HnZXq>@JrVb2~Vl$gr}t;RjwVrBLLl&vp)X3(Q$AU-XEk9 z=%IoBV|Y#$KOyxZ{6&_d^#@T;?SQ)$yoGx><5An;Jh)IH`PBTo{ zlccPb??Nuvc?Q=ps2lC5{PR7pqCd8cwIDb;nsQv@{^o&S*qxk^8m)1;&95?D1SQJB zPflL;jNX^|{|N6Emyut{Gg9AAn5TCzzemh}J<-tUzQKT}gciQRP!=!YX4-twSK5V6x6N&7Y%e0_-y!p!QB}Qi^cv&x!r!JQFxSI_=vW~=f`=8ca>KK zsMclb-B-$Tkbn{pXBlHi5wg*GoxfVM2VoTw>djvm{rJQVD7#<%H;Y{p$6Uw#U8tuN zM9RD8oy2AVoRR!GG3$E3ta~4WQQ)<6d4?8h4E^{>Vd-15qgyet1(jnTkp$aV2s*4X zQS1#k;0c(%N%D%ug;;z&gR;;`j}2|ePA&l1pVu!b{4NBo>gFllt;RlZX%IC`m*ZI` zLg#a?lb+l)PBy?eF@81p+hvP?xLj@peSs>t{;WZVdth)-#6(DS#R|f>tHtodD^`U> ztdpjA7e=8+xqUR307LK85k8=&ryqFwoCuU(313s&t73#^p`Ft=1>6*;8w5_W9=M|2 z=?Ne>Xe$IxPD)A|o0C{C)&9^MM%;g zJL5mvl~B1ySko;C%{0J|k48Vd+9T;f;Mvfkm-*lUy5zEEoS2B{N|_at1;o}NNprWH z%qzf?jadtn<}aut?wtrdTvvNSArMf-nb^;WuUB~HvL^|ZEcr)*N5QI>cI25Lz!QN1 zUp21Kr5&O?lshr8@l-1~F`0S_j7gXlsLBkdlkk)=%g^)fR{dQA-G@|h;=Z#Tw6?w9 z<`qEQ9#h8S#B%h7TsLptG)~ni6s`2yu+!b=mkSb!N91V{^-151mI1wFxv?qzcU0@H z%G|F|3GzHMuvFEi2mZPF_>NTw`5vwxFv!KdxO=WLXAg)2pQ^N}?XXnyXufZ;7w)F* zKsbKuOd*Sw^gXBa-;%$>Rto6xMFy$5bK|=%d;e7+E+uuZdJY78M{g5eBT&a=O4Swl z!B`Fd139R?G7&i6`n6`{KxqF=3KJ)QWMIIfAxc@7`b!O7{6PxA2`t5OKQ9HNoga2) z+K)6(P|WP5jp5Irs<$nlb7#jE6JVz5h*aIly?Yo#P?z8cUiVBKy+e{d>(1D*V-CE^ zFyfr@&{C#iQgRtcZh2eRLU`TywY&%0QcZ+gwQx$~t zM9hZv2BWu0Rv2J~NqfrlzuW-zhsI!k zC&=PC{8n625}e5c?d_WY?mnT=yz;V;aXatzUPPOk8q-+4=<}~=qeA8Fk|YmH{X<0x zLQNopMMGX;bMdkA$DEBPyn?Fzi|#q+{_q7v57R>E+80Luvxxf}PJy#Z5Gq=!T_blO z|IIQI%wsae@bJ8(wYe3jzxOh@PsPRpg`IW&2=4I8hCag0TCB)9p=YW=0ZJmzQ!#^B z^B;uaN&MK(cBIqIM=eoR2>jxZ5^hZ)KS84rnByHJOrP{HcSus$)`IYol!0kwD;1X? zqqJSFRh;0oxEdoKmwsP?c+O_bOv(k@x=1$TaPaT&l1p^-c3y>yh>UKsqL81~jKAEfUV6ExJX^+XI1p@Y z|LGxM@(^}bbw;8MA}YL1b^QK3S8HG$2b_}cbM!}*1&iJvjBI=dbr#uaLeWBOGKJ9^ z+muDE{MAfsxJBwtdKp_jB04>~Q=Z!GUQ~3pq6+z>KEcCux25g5{Yl*AZis2(1 zC5Yy{mp=w(uP+?;O=HJSLCkNxR_bDm@*UgeSumvyoz{0QP#;Yrb;DPotcm&hp-^cM z<>17F;DA+Q7dh~85f@dviEeVeVm!|H=@;pL`XTIgNPsGw%o79sqrgh-Yol&J}1#0>GK=wOgh7fHoqRuE^CyP`;uMFf{)xP$*+85WSd5J z+MToAqtNZ#+B(nD8)Wj>Ozhc`2#aj_Ey)H@%kfvvflA@!X<;x<-M6x`0yjMvSeO** zXeNi9#Ca^anN#>S%?*#@lc23cZQ9bXyP8{V{-d)er%!d;n)M4P-qsm1TH{Vc=&nmc zjWX&cQN|Z9NUzOoEeG_r74kK76e`aN%;qw~mznTAvJr^u+&s1;O0lC2A7K~PVZm)s zk_dw`HvSTjrK4r zPVrc91JjfBH;;+NJm{%ko%mMrD|y>}v67e&r%as6)iGU%^PEe0x*tz+pQ`$;6ezXn ziqmy68M0*PJq{t}+V(bZZ(DqEadEoN-HR7j88rhPb-W)Ng?>+ygQgI!OP1@ms+(uz zEZ?-!&GU0h6l!^@9a@)GUnsjpPoP$(@D9-|hJ5ThTxXt_X2-VvUd&Vvndj51urBHq z?7M!dn=8X0GxOpLW>qcL!O$+1HJAK~SC6e$Q+4n;sW?jBG#NLP*&rIo4ZnO5lnFe~ zTB9X%8?$tAS}wG%gS19*Wm40aF*H+hOCj!%W~NY{6M6O+X$gUpTUY0{8S)dB{oFdb zRo?kEZZq(3=tHh$6syNQ5k2LaKrI}hl0uS(i;~5wN}y9xBHyyS>W1%w1+HD@x8r3T zqjR);vdwpvlWA<$n2Su)m}9NF6gN+Ep?mDijI7QVt^#Fk(;>;t<0yBIIu#C-*?1b6 z2j%fm_7X&1tK^pJIlSROW1aZTDCOyyb;*Ns_|tO+EV7 zw%I;NxStKr^;erxmrxw};S<=tzWG|$RX5J1ksd(f9c6044E7oQzh3;pu}&emo2nM~ zu2ac|NQ&bzR!WSS4^8UD?v#h?r9gAT;6(3~uUW0ktbe$nofubQ=F=yt$)kDahr^94 zsu^W?T2tz^6`ix-Hoo$^>f^c^;7)|UP7C7(UEu=>xeHPyiteXJG6=PtqDxFvoRC_) zne!;>9~sU|ha$H!Gg`|r@sw1h$mOQXrPLjom;w$HaC{u>EVHL+=6|1fKTPTx;J_cb z+%~?0iob&U&!~5CAP#|VZ4bf``@iy+!BBJ1*Btq4Oa0>i;brhaj%MM*s81zt)riSfZtK-Q50ggzWUmY?cFf z^m$)I3;owu{xUcg35cZb;*s(Ea3KB+hmnCk*ewJqZGZXP|4f;e4~S$wgJ?OZ5#4{` zlM@1XBpVoy_`ku@N=i^+HnoWaw~g*2#tt6-Lj$vGK(vH3wGM9m`oB}wcLE}1l$I+W z*rE16Tm=39&Ft@jp}+zrjZRNblLf|V9J!X*3)t9M5qe?YgE3$}MB@Ev(b3Uyx9@`M z!U@vi9I6bCLIrdaj2}0)xPpbPE*u*lum4$cZEfvsZf>rKvdM)bF$qlpcuABr1!#e7 zpBOr+oAP-0(SLCLKS02K0eLb*xfOucu9ei(o*cNmh@A#i|MTiDW?`hnU`#eXk^&3c z>^dNwi~i*!=P@D#W8hJ&vRK$;qCgJx@;?gyZGxSdD&7O#5V%hI8n8euEs(VbJVs?l z;UU%mpaGfN!iI^(%mBu`1veNTIR*=`H(q^-peNJ?t&WzrH28La_+R$h5svdL)Skn< zVBN9OU@+?kc67_>LkBT40dPLPF*n7AG^0_#nd!XK=J&7Ys|BLb5>RYZhYblx^TcR< zKTi&dI1gt<_7EW3LKeQ?K;P0+02#GeQX?ROfB2~J%Ycwn#_?c`AQ;1(7Y-LZYD^^n zjzbiRAf~Q&2VT0*=fL+#|_@e-)rwu1k*<8^ZnsV32L{gg!L0(Obi&aK2=C^l;(q>X;Oy^P+JY;m4R5=_HY&e zRQ!mhVo4!f)Dvm}^orSFs>+dL0>GHs_vRzgo)z!WKr)tt>&*YEw*L`ds1P71N3+9g z;KtMf1=4b=*x>n*^2QG)TC(QE12TFAj1hceN`KTCT{Lj|*o)klSW|RhjJu2FfqfVE z!y5}|0?)kz6e24yrq$R@N6JoK7MhAynTdqda^yg+JKv9o9>LKN_INb6Oh?=%roNF0 zfE=v0dUzzN<-m*h$cwZ>o=`T3oxQz?hlfY2BFM|8=;`UHz2Rni0k`l9ZXCnV;De!Az+En6G^uZ7jOuH zi5@^4X|JZ#WoRrX01Ro}!g%BuQ831+o5PkCs;dcTRT#LH2EiPmnBdt<;ziNKh++Pt$zjk$pa?k1LV6b9@U+Vqw472H2+jGQ<~ zGw1-a0}Tf#Fnnx!P+!P@ahAF&P>9}KZH*9ucY%PTEHsyPT8I8s1S9T&F&7+bBTo>V zR{=m;4|DBwcmFFVm|O!d`gERtMFZXEu^d65W-%}kf1M~8;|;K`ZolA?Xh{#qnEa7NHJj9jJk zNUd#zc6utc*o=YGqXdA|T*W_+6>{5g^VL$;a^u**QmXrla`H})wsgP#~0q)*lv3Iz}Ce70xtG8kjDtyzV8tju$vG&J3E6? zz8g2>l&$pfb%&qpNI=rlPnaKR_aV*F0Dne#+_ccxdpI`76o+wRD~T#3l?ocyKgHM_ z(+wOq(>|0n0D59C0#G`w+cLtTZ%z*3z5@Y*hfJ91{cUHGEMD_E+1NQc7OXy3DB-${3sz5UOMl*$>( zsMPnnG6yXUW`wq2NjLfVV+{jxm|@f`J*hi?6eUdD02{aV=`&t|#tOsa5j0F! zJ%`sm5H)b@^iluo#CO52t!0SZWDOglC>z@hpgn z5Wo7jXl^aahKqPYhq1n7(oisahPrI(8XFrYFrMN&Vl?o9%z^8O94S)=LZ(kpbuNpH zGJyD!S~bdzC2QLB!J%_?WzsT znG(*t(g(5)Y?3u!eC2`3tqcMUEk1fbKsR- zqHY<6av6)Jz)zE-;&ZT8Abw_>eEEV{*bHZ{+2QM@J`dHHVsM1Col$-|&2c1Y4FGBH z=IbR%d4l*8o|e#iaO7}8x5bJ)K=>e?v!7SU)X^?c!Pg-O!FelSMzgqicrJVAin?eD z-0I~qc0FqDeZDv?$XT#Yeg9o9&0;HZw>4i``pcA&CFy9gJ6yVZ|l18Aqiu>lXK zK5E>%aDdBN6MJb-DC5b+U%y0UWo7Lg97J_=bQrIV&Jaa( zEPy8RNXPXL2Y}G!sY49S{y)VEg7xF|m+a&u?990To&Ed)f(85kdl>vAfKQbmqetqf zKYkt1lE)6G)h=xB&_^wTw-85Z&p$*2-t>jtv4=lEy@3iK`REjQlq~(h4+z`+AKwJ( z8z>2aI`jYI8o@>x_ed`Ky+Aqik(~EK?aM*3vE}9EnKzaw5pe%E{BF+7#R!Q$^9-c& z-8G9m86F;_KVHgZ&RYO8v#?T~aBG`IW{=B_AKmbvvVB!3teE;*J9Tb2F?$CdYD)$e4Qd9_w@ zhh>)uJnzyye3vs0f{;qM^RrO-xQFs7G<5w*tUow@$Jy^P-GF1LT*b^291Jw1y!+j1 z{4i9T0atNJVBiB(@&B>+o?%fX-M=U_pac;W34#P=07-%(BDs|@3@E5T(}3g*DoW1S zqNpHXB1@LoM9DelsN^glIY|ypxV4pT-~TA>d+vSidCrG3UuN6v+Euk`<@KwojiNGX z#mBdWq+I!g-~~m!NnbtIuaG2XsS6$N9$A%+EBS2VKROku{@!#XMiZ6;n^>LsGQcvi zV^%p(WQ{L<2xlWp2iq2e?~c@M%}c$ZMojQJ90?9Kr7TdG@T)Sl@|C)u`r`Cin^am! z5UDDn?6;knkE3Z(Y4%BxifS{Y2k7XTYbR%C-K6n}9Q+vU9(A#drt|93(i8rYx(iJA zy3^kXAafL{5zfNmh}pcH8Pj8Hs*3)16M6(kS4|J^i?_V(?1JDPlYljF3fL`tt^e>e zHPCFX9#8%zY#YYMR4k*7D&ZkPA3IKeVEn6_kwLG7Q!CT^hsrKnHl5FZP#dmJ63_IN zPbgh%ZSIxHjsx4%)H_PZ4mLf3y>$hOgW9Nu03j5wPD!QW^v$JX_y#g(qoa*dieGS+ zSt4xN`v}pUipN^U^(HPNJ$hsPmXPVuI*Kyc|NQgKD7WV_ef}jyL9+KW6VwxTy!g*M zCrYwxW>tJCI*}CAtuTMnwc}{KyV99(Iy@#zRWU3IhAb#32r%8pW4Ev9ZvTU!uSRYB z+B^^cnLA41Mmd+&`U*LFM~2LkCa_3AB}!~c0_Mqo(}VS9xRSCM>p92wCeNO_tPH@m|`V+N0fxd)s%UFW_f^^ znC793R}u|b4O9;EEeSSP+6>`UzU}~PU=yIP=8n1K%ZgZ>P7c766l+CY3H^hPun&K%}RdY6{18+Q6W)OWl1pH4~_TGimymzDB zI6Luo`Lcq8;K2%RT{3Nv(|9CrhPU}0VZv>+Mx1q9SZ3e zGSkfT^h*#kn^4BF?9DcWup4a6ii8P@%keQWG2=e2kCks&Y+v6Ei;YgAj+S#HD0!Zy z+@`O?CqiNB#DN@-lqP*#-+Ik?knvzXC#xS!vDzi+GTkZK@*}13J0bM1_ty1Vo5X=9 zA1TSKaVr%BTD~_mtDHM^Ea1ocMx3Z)Hmg;DKI9s*r=GOQ#gwS-&4@F6%|{6nyu7?_ z+5FRnUXd(EfW2yvqJJ4$Y}A&rZ{NO3Go35=PzE;n`^Hcyh>jQF2;mE%YN9cg<>;G4W@gCkiVw z3}*&v-f~{G$#O{(YPb@3YWMHIQCjdW7$sxzkwR_<{sL?|hh7+%gZ~-z+8IkbalO;x)26 zQW?TYL4i)5mVj2=+ID!Plr%jMUPb~XY$MTV(ShtIZ>sQKpXxBJk>0a-jgOt zAy=2MUCJO@d4*-muo_~6-aZzEm4>9d;AWPu##ik>PnrqrEsA6M!cqd3n^xA4Id0%B9L)yjaf2fnsKJ8f}ZYxGp?x z^(f9=2PwLZwkJxWx}P69t$6&L35jBf$pYm$r@h4=b&G`I7lNDYIU)*X;M@1T-HMhS8-b#!#<7~r5i zH&Y*h99{I{$-Oxg?n?erjR0L#B$b<+`wb+S2HcVSQ zIWyx406V_>>>`CmCuYJ>BjNM0w_b}CYJ$usL0$BRwtUAXpkI{a z|4@0SnFod(88?>wjTYS%l4c_qC3SJP(a_h<2Rb&9>;5<_Gu($HDYToh<3>ZFyS)~i zy_H*V3J=aKOb#}DqVRx1gS9PG_zL+AZ4H&d}_;{<-}68y?Ss14GWvIIQpQvM*NxebsUhGm{u)Qb@@ ziVI^^@iMP}Yc-4j!8YwK^1FfTE{ozxNcPFlmX#ldH@E|eY<|uc3Ps6;8{Xj&odpmG zi_qh!W<>{B#R|$5gu^4tt!UOnc^L3bH;REzJ_|QFPY7o9JWods z_sg#jK43UX5u;34lAz9zechLG+mMLp(z@=Y<+IpvW4rpPeAz5z7KI5OA~~ zW48QjFT1~>5Kw6+sxO^(w>$V-@C~7wD*+sP_XEEL<{$Al1C9k0A>?j%#0H+S2t7Y& zGyg^y>jH#%LPZ(dMZUeO5zRUf4FfG5HLoLc(khJjo?)jP_6o~#DlAk74iMn!m$lgv zaNLb7eGY8t*vJ`-WOP892TuZKEg1lM-u^ExIDqs%ZsWs}4Gz#LyfLoWA^%(@>@Q(T z5Wqp7GZFhv^WV@&(^aCeUc_$(C=d?@MugQJ|0O5>dtkc?XCsZ_z<~p-|Nj#Ys>XsC zYJ7S+0dwa79+L*w;sqqo{j`YXl1K2%;0KyiJ3J}nFha=t%J@`XlR8*FyYK56cAVs> z0$y&rq!hX*`vchC2>{z;o~*>epwe@SQH(Y(4_%muaY`N8m&AH^F;aVd&kvxYsbj%0 zdAm+Qm}0R>u7z$XR=CbUsQMR8b$d1^gi%7cGjgL z!dooI{K7-c+<-cXAK$hE=-GF^Gdc-)o1>do0Z#;tE$Z(!iOmB!yS^GqJj-&E4SbcM zWD=KL3j^pi-xh4AzhTqg3=Cod$mJYwmpB(A)xVfRA@s*E?d3 z&~LtZ1%d=h0w`-SOY*FT=C;vJI8A{|=|XDPhq=nL=tOG4hI0xOiI&r{cJmWe!yyCe zArr9{9xu-H9JlZ+XgO=YR@b0h^_FUj5At1E?o3eY=f{C`Fma$J=zM*(nH)Q_-*nzj z13G7bI)9OOk3qgM+os%|R>S-f??8?=@A7LU-5RRo;}nH1akQ#d`m!STpIHs9b22yP z=dU30%Ezq5K791zww+2E&|sk3)M^SBF0zYv!oT)uao?H<1yoFUuuK(mp_qu@(A61 zk`qKk2=f6@aHacWH?W2TYtyl^3xK|r5I{3u;tU&1_sCk4X>z4^VWD=#iG*{u=>ElLlQ)$b_iwf4DIrGvf@Vt34oB5u{o)4m=qw0>J_ z&Ect%3oj^jp0PQrE_B9e7n&G4gf0i!4!ohRoir3@^saayK6dXez!HLrfsnPd#7~C$ zWu4D?0gW^~{WRcEY#_YpgXho$3X`_cGyJYj2erz_>@D?eEPIYhxLl$rPh7)pKIc}| zdmGeKoet{LFIV}N!{ZwgZ;94YY9)3oHz4f_nyM(|B?9A&mKW0GE2TnRL=8f$+Wjp; zR`o6!Gw~!HW?3(H3t;~c))TIlB%mV_7ua@wV8S!NATa04ZL^S-x7kDKY;KbBPIKnd z>+_*5MYK&)XAbd8F52cV)l&zY8GUb9c%AhA>^kjpZ?ly)f5YvkXXmyCvWgB-4LDqF zr;j_*KFg_o+@e9uB~rj!)H{{_d<6_ZCs2<8e;wTa>*`>Cf%5(He z;GOw$&O=^;R_gs)jeGmzOiPlN;sm!u%#H2aqvXZChQmy&p7LgShW*o~3(TK^;`a3rogMWx*UBruMGn!ULw z9)+d)PuK|<5)^G-{!_J_@dll7h+Y_#HDE7zaO19ZMSL>L;v&)@Bjajt3uE$*7<0~C zgHfwfOh=TY9yj=k+lHO1b&Hdn=$n+Jw{=t3J5G4`W_`_H1{fP%?* zGkbRKfX!M*9e=r%f`iTmyP@H-ANQ>pyUCG@mT}TJkC-x3E9pjP0yE)+itVy6h5WpiX}D$3JF2e*fMYy2Pd%~% zS;Ev$w-0;NtM|+=NA!nQI(KlvS5tz8cyYNc;7+lS+hQwrTp@%qwN433N60;=CPs zX@gQO)#rUF)nB$6=9lw8JDXdo%pEu{r;XM}sEeifg6c_IxvFR36jB{Aou$?O04?wnfwo70Phi*w2vUM_{-u78-~ zqnkixMqm7x9_<(6EUvNB!E$_4eehJLq;+XN5wzH|v&W597_RThJ zt{Tbbx+P;?YSK|==C8Ht&Tqc;)+?2m%iprDUk)MB0s8Qz-KeGjt>ez^GmG_Uis`fF z$w%K$Ni0^G?l>BkBU#qQt8`0N`H$J$5$2mpOCpkc>3#3pQ}L++$14+h~jka z-}UFK?x?iLbO?YOgU$4w?#t>N{)RNIV=^g;1$*)0Ss!e*_GjM*IwHiY(3fG3{Dzimb~|yC9~GIY!UtKZ_=A3rFKCKw*88W_s*+%w%qt55_*7${;l8FRH9B@?;F5>0P*JZbjjWShrpV|^kX1j9qQ|?ZndB&xM*dUKtzXtTeO{5in??F5lQnZgj8PnZpEd zvG*`}i#izDulCtXzjvyZV~fsPJImrJAy@cenp~Gl%c}R_)PV-4j$zD@E02xAhJ0Jb zXCzg>UET9}4(9Xhh_$bk?5R=Y51ORbALMV=Q@<{Cpx~b`$)7qMC+GEO(D9;noa8?OcLA<@ZK=H8LPYRKsq7s@6Ie%SA`#tT2`eur@_3zmB_OmYsU6P0^ z>gB%jRAalTXosfC>E29!N%*QfQj+$L-V1-@(SYHJXDp|X6{azfjb2pAInM2eltb;V zZzj#{xH+#x6?l6B?~vP=GRO0F@x+K>XuaYFx4!y>N#UGX#^;;DTwkm^*kp6x8YVE~KW~TBo8}4t;4Ox?p%_pmeUuerKs5t8sF%cyZ?y zS-ZsQn)Jg>uPK`2ri*6G451rCjck%E4G${STXbukHILcHEG}w#@U4oUTpaLw?|5xV z(^{^7{pI|NO&}IW|c4|7tzKzPA$$a6>q&Rm2r6QXk0JaGab3I zxYA=*aWP1)+Qgz&iON^~7`K%`;_9R9nc79c4ie|7DrLO@@ducOhogi&o}zwHewsqQ z8qUkMi=I2>13POyw%aeBT}_w27#-{^AVjNaP!Znb>|B0-ek>@7VPmd9E>DzJ_3p$4 zr_g&t^ZU2&Jg9eUD|9~=J!!%@n>leal*ifRZRd8g;LvV(v?S59?GRJtZd!zy)#8jo zkYK7g&p&7KS~Kn%E2RLHWN35ilb=-mh2ZILfFH{<6P=M>fQuDxG2Om7=E~!8l6I@u zJhfSeX}-V0*NbGJNRcJ$lD5zSMFnhqLmfSWJ~{ljLDj zvkY0s`&rT-yXA|P-e)hpvvKi_XL6cl-0$b)reQxif59NnCfR^@nY#>%PZ*|NdCb-; zHgm83qr6Dz7Tw-${;X5SyX;wRwn;73P$h*tf9D9~86s3yEu=$ZB(|#hvJB!cj-dUQ zfrHujr2ex-mp7HC+Uf1C=DfFFd8|oNS;7XrZJL&3nC)pBmBBn6w;jjrfVC3k{=T^p zF=2NhDU8pBv1WC5uZ(WP!sBXlHkS~2miGqJL$=bA!gyOhA<=JXg2VO^cPnaD_HCWf zA9$^_-y3M+O!cH2w|IkohBjE|8T(6^G04;gT2EbnUwjS4BPVw9KJe%hs6J=d1LY_O zNdD%`fCJqN5?b{LZcDDtw=l`FP!R60XYTYHvnBC3ViJXiza=8PEc<%C8LqxP?G+{Y zaWZ!C7_)#3D2cnty$4zHZfds7g~!4B5gh!e%!d>^SZE0j1Tff4-ZGhUsG6NRGIh+xqM<< zU*11nlr2egmBfT6G}%lA(=zcSNRA3^Id=y ze5KV!znZsbpI*rH&Q@*R{8l2O*2LlFK&VbX30>nRuex7AQbWCyull{M@(@mM@;Wy3dcZMSHp$-@`$v7~LOdgL zKb9fNl1tEfv#xW!YaT5_@u9~hS})HoJ4LMoGaivuW{c)I;voU{2+e6cP`gayWA1wM zndVBC|2;1e!Ll*5PR3TaVYz!~_lM3UH_4)J*-k?%nwsa91~f&!eL#-1s9o6&&=*(U zmf9L?jyX5M0-9L1Fntdvvm~Wj&#OE5CrKT5bU!vq@rIIRtqyaX4sG_aq1n4}y}x6) ze|ySpTP(O?)#V!-WIczjin-XAzn4l~UrDWC9^khn8FSG9#_pSsP#U*yl$7#BlixD} zaDWzZ3b(}~zC*KgLAg+9w9uah^^v6C8IyRysD(^rjtLv$wo96b{ZwEo6xeS2^5QNvS*9-nCezd_euMaQt%V330(@x&m(SV%-l^OG7YVn2I{W`uX zhCCOZ0gsEbFt29+N6CS?>7bzk4?c;ZU`vSsk!LsDISqS*a!#I6_qniHQ$YT}_Gt>p zJeTq3@f!r`CPhfPQr;f+hoiYQtzz9X0 zr-5yc&lr?kT4-EXP;-(vKQQB<5_)6T1SI?QjyxNyc!t=?5m-e|0EvQR~7PneXLtlyH!~= zGh$VVQ#70=axR!pwYequ&Gnutv|bCan`vn?H=NFEXr{4ydj3?_<4;<7RkKB$a$UNM z2^}kvQ^NCw>}jqBK6f|!it>0Hr6$!bcf59x=%*bJJ=xwpWU3+^lHC&>@Tm2Rg3L(j zur-o1#ARqO+@{cKas))fUCLuoR4qa;CN}bxK0IhyjqVFpisM?9DQKEC*Mbsr0V{y# zm$tS2HZ$~+uFmGMIp=rKee8}Q=)ENCC~&Z!7h_Uga>bP}vG?L62NhUl*+S#t-xx0Lvd5g-xR`>I7ZTS?u@028X8rYU!C(X#V zrMO8Exoj!CZ!b`4Z@QhQc+ogPo}4J^X7d+?ahaT=zLlP}u8U#^GSqf>@~Sw*TsTeF zmh4P~*Bq@s+psAi7-nh;L?4Vr##DYpCyNdFD6ICLWZvpgmzI*69x5sai3PEx0bvbW z`#VA-`=xgZdIP7bysicZug_t2EOp;oeJM|nl%-7B+R7ZLd|EXVoS_`I&>0e=@p`?B zDvQPL-dCfJ3!`qKxlP&UA=gOSg0bP7a>4#emLzerCk=&Cd1q)<^L5Lc6p-BNRU&H% z!1JGPbCvT1LC}Q_uXSq##**1)>(gbnOHMn^%b^cD%=kT_Oz-pU7BP(cmXnH4d8NHZ zfqYmSWUcO9O~e&sf2~aQgY?Zz!46{J63nhlrl2K4_h}nCxAy4u^=&$-z&68NFFqx= zK&QqWK@>XA*}`~pbW`EVOU$59d!1rk?q~u_e>}Sbe^u2a5 zYOqO}N|YU;F}J9z(PH5(b+l?OTLU3=6<|CNR&h&%{4VJ(oAZ-};nr$5wzU)1yOQ+^ z{Cc8+A=Ryu7%1o$7|`PvzaN^t)j>z;w3O3#@I^*eH#^#R_V}VwO78H6W(B`_p?`?5h6|rpx508adgpT!R3`Yq!$ybHo6mQXr`(i?)-+#vE^{mH;p#ihst6_) zFBgD%Gn@`m-{N}uW@UY1PN&hX|83U5c3m{cQ?cA>et52|*YuDA=X#Rv@#)QAn|`l3 zm%&KsDQ3&MM1%8=5pjX-AgSa}mL;*X2Jh{0Q7;k990|Gl_t;n(I?Qt&KKTe91Tk`# zn;Nc0igna*ix5@;JT_HFSH5H?Vsjp$D4!Ua8*w@c8KH|_y%MuI^X?yWx0U#wo2CDUG?iDo{}g>JgHGFP*tGH$0+S@7`l{h)(R`L!Er z<;sN)3(YRx2AzV&k+$|fr5}c%8oAebyuEXF#wQ9_j)Dld5jb+JmP?3N7m@q&(`J9gW((g@NacTq0ues@iszj}}Gb zPMWQ~os+4FVR2@)x9)ln+jUhk_2Tq{a`|W5V(Q22hJ?>Ee^QRK=1RzD`J|A>9VfBw zjTtI8yL-0va;~U(6N~0y!6tFplKTqdv)}lW-YI!beXE;g6s3+lwmsBY^RUXQ#ciV> zgo8R2@&#hfg%>gWVw|T%)?bBtyX{|1x-j!R+pb$)hP~P(Yg>8c`3>v-q6nLUzaJIn z0p%eF>M(oJ8kg6XRipUorgP*57}u0!hSVfhXX zdOD_}xtn>?r*j2IL(EF*0n@IHx@dSSe&SZLll5sW+%8*O2#Fb_wjEw}xjL5=`eH29 z_QK+@rSE5zi6pn5hErRs zx}19O1}2ROwy>xd+;K1+T>vS=a$syEhPrdltxN{pWr|b1Icw#V^LD*MO7mr5cvRM#`UjmAFM?ur`Ns(PATi(0BM}e3r=(VeytrH-s^U}Y2Do;X+w;yYAOzK(WgHGwG88)H@!XOsuUvonP@_Vs6{66|{V?bmZ2&i}JtUlP+j zXn4gW6!XDdH%n8PZ+5Ea^!CHmM3ORO+{f0ypnHrUAZ?7LNxZ$&B)*^pl`B)fM99*r zOPhSJ@^tN=^C!DKg623Ms7j=L&Xy5_l2wNuahXRWBjycX(33?u)bFF#bN=H+}k7_3`sN-{A#sP=8 zsNeiviKOy??bNl7j)tsn7!NND$QuQm_pszuOTTCGZH>sq>#Ys{9WK*BcFuCQ9v3mh z`jYQu9?62uUIj?u8mQACjgD|7to9BQ%~uv_dMIAxnE|!+4f5AR zT?{MhR^OQ-4~UX%0!}aV2c-%RA*T6tY&N>NNg1Rv&&;QA>8}oxz=*GqA0^jPcgDaI z(hr^4Q`~`pYhVYe^$tcqleio4MQUeTxn6RtS4ndR%`d?ML`S||=}BDeXNDllimM%c zymQMAZSTt^gf$M2G38)x`o-CI#xpu`A^MEa$VoO99)|q*(K|~%d=FZl8kYMgcrk1Y zVtrzNuxs-%$T`s*vciXN`B~X6wk6NsoPG->@5>$N)N&)pHFN_Uay9tX$LQweq8B=N zf$8#{%~xYbStjb{_CNeK5wb9aOdl>&np77i?uMXq1*-#-d&4CXIUvmEE?iixq1_&e z8^T_YL{E3n2+mG6C9o=Sh%F5j#a?SK#~4p~LZm`sw9oIQJpnm&LG_v;C^QS|H0&}7 zaBwngM=Hk7qSdT3taQe7`xD*TB4jgpVOHS9c*YDAo>1lcM!5ZY(vT5^TaayzWhf6f z^C{B@F6FAZut|zdVKS)5?sE2cnOW%CMvtWRW!v_}OeVb-{g+u+ zVVu+F&qZAo(;9K^7TOz+RUHNR^K^`o0<^#a!Np8cxsr~feP##GJfTTuvK|o5U-54# z&9__RU|t`+%tED!X^o@a+1ak!5l$$WwZBPrXgnNzf$9t;o(YzS`>6o)cRpMZz$EyF@ zW>?eB*n_pJl1=XjP|P6STc&S)fQQ9j05_Ta%G~#}+g?2>u82`8^;A~>ywJudu@zcc z04g12hV#!$JGQDm|0T052g?B=dF}F&$=>kvo6vz)mu_SkKvMGs<={f+tE3^uZX^_3 ze~~koG4sw6IT+9qbi7tom>V8g<+bbJI*Bv({18G2^?`VMrCGDAZOqfn%w?Ne&^Kiz zck1b?0Ebss>A` zSdKZntj_Rx*e$>2e0Dim^3?hzB{=IdV5&skEAs7Kt0sqPO3g1i&O-~hss6=`YMb40 z{XNyQsUllvqMCO8eu?AX`sJV4Eslntgojaq4?4kx@Ej=wKM7uWk>UX1v`9bP_>z?o z`i!Q|&#l|rj8tqkN7Htl(fi;=C+#ZL>3xgnfUNz!B)3UgT3j6TtHhATo z+{=&HFWNy|vp7Au_craDyr79rL)Lw)f{#0Ns92`r+^NUE#BJDK;9>h=L{wUyUS4jm zU%z(y3;N+@H?BglyoPQ&93n#&CWqtvzSS${!G+66nfxJ<8c7xUfEWADn zvKrqfb>PmI$;B&WN@Jea^E(bdAqs`@V==k7cYZ#MzXl8L&5GL`!rEL2e!w7R@wB;? zRa{Jj)YaA1wzbW^s9)o*n(L#8+X2+9fEp!09M4BA3Tn&0#s}pTI0RM{CEx(g_4q-G zVm;tyPeDf){)RLV)mZsqFZQR<-c=)i#+Nu+p}>NB?;haO4AfUF`u;bcKwSr~=<5U? z#h$rX?FVLBII19w`^di^P$wz=VlH-HlSQ<;i|@C`o>z5L?#qf=WvNKjLE$BJ^m;(b z1B#1f|89$$1L`>FcSP$*238!Z13|y-2G}bKTd0_l1x9ZZ;`f=3ZAiLw#ZEPhtK0d;PX+CD$nYpw_`% zwHpLeDJ^L2lJimN>#?}}=>MvD1Z*GH2~=oBh)W+%3;!Yu3xB=32IhQ_l2Yixti2Zd zH1N#iGwpHg;(z7K`(d}T8DVcBM^$v2?cT$AQIC!%J zH_c!Qz`?roMT*={P1>3s2g};yF=4F6s8-^XljUz{tR=45U$m9lU2d&Ium-{enW!VR z`g!38OZ&xeOZDr6J$_%7{%D}o;8aw z0z`cDwKKTU{`$Z~`hok=2z}{6gT}<&zoK4}@AAiMw@Gk!tY|)&N zWk-RU!zqwWiga=J*5-AO?-q>tAdSm_|}uR$?QBP>k93Mv2sxS zq*$bi@>!4B{mo-H3F=_DW&98M!s@Vicm!d>r%#^*&VIJttk;fFE_-}$+WA!*N!0z_ z&WrDYXreuv!X&>4L0uZ!&@lEZL2$oJ9KCk#5sgied!E*f!sF8ZjZY)EcfSMYC%#{# z&->ou?r@XK!#jit8siLj3fZh=sK1Gjse_}sq$DYCko)?vR*U{+G34T+8%=u&Gj7K{ zG0zeAhvnlt+ZFCja)e4Nqno7e%oB+a{W2`~-ah%D@nPBEN>CL;{NZAmny;tuj3qOr=4CaX_XIx;dIrnoM>&~YDtrs(zEwLM7jjT4cdbhY|ACEu*6qb$t{qRZ&62$4pK5gtD zaLh3cOSCR>mv1xi3Jy_k!gpSQF)dJOJSQoEjzM;j@HEI?#cmQot&phBIFGEAtZemS zSNT^~WqqZAQY3-;?|&L`7RyvwFzcc}=u(wK*OUl&9#c|LJ)bkaSRq@G*2YeE2kg_f@KdPF4>Kw=bgK-G&ocC2c7khU!L^2MJt-Eq7TT&e#XN%(WD>FKB7S%X9(P= z6FtGpgFV>h6%%2eroLg4BA&f|%>{51B4nc1$(Q#|tK6C@RFlWfo%F!<;#p;)$d!|O zsu}_mG0APlVs{SE{!Yq0uH%{Mfvf?_IAc=J(QLAcz8!ZoUP-E|!C(x}LGP%$WFJHy z3H6;uUP5-M>c#K6YVPb`#!LP)u)(Pl+{0zMYgPn2a*gBTM)b#y8H>n23)Y~d$mF7N zI=C#q%d*!zi!BGykU$~m9|O`;9% zhxop-@GRKaapB|li|p>!4wowTx!dD$Rahxt0i;@&tAWWo7|;?YOwzgH@{0 z#WPtipQ4)>4;R)>5l`WcgZNd>w637)iBojw1fQl1jXg#p#IS}g1=hkOLXS>aY||Pm z?ZN4RUmxBau2v-AIYOq*o^~@IyDv^aMJrl8dB~|$xt(HflE7~fp9OqMEvQDy)2}O> z5W%bMWT5oL*jqknA%LEV4WXdB)b;hFz6W_pn@P^A2Xp$Sl#W2* zIIs)d_MzXD)UMyhqZX?J;L$|Lho>vSf6~bLq8SbA3KL~+_0@)>FHLBS0=bCU34D@r zlqkV10A%$HLxJq1=ld2*HD}i829xZ^D&DoFpdhpMDz^CXy}X@O>yq;p6v-7Xb7BP_zb#&rFrsllKTFJ|R29N>F2}df z3_IhI-+c0~+>ew(KV&l^@`pY>DJYyQIzBr}SFp^)F{-MN?EFx)LsWcMF<5dm8&@a1 z4HX{EeXEogOsbK=Ga`xS+YzK+&pr~eTNpJzr+teSC5ctaX=J1t4IE$b>9+{-uaaO` zozAoFO05-MnWCEDeT~kpJcM_e5C#z|^q% zex+3{W!J1W65`|pP|o-r1N< zf2&cW-HaC>L5!hba&bEL-X*Oh!lWEYP5uu(1geXQEAh;-P{Gpp=s3r?pjvEPPLo{! zrLH|dSOg_VP!ANt*tPdx&wOvs_)f&Vq^@RzQFtc_IC z6B^NW9Hq*sE2@dUQM9di9OB&%)I;}ucs*hUM^$FS zq@|@50+SKwa7At!y36>2-0z$nrUkKL(1RJ^2Qi|uF>T+zeS25J?c=UX>L7jhk2kcu zA_My1`t`Rb$qro$Dz+r#71N5k7u|}D$o78^w12kAH0MJRNfs)CEFXHp7f7pj`uqp>R zm1Mzp3hIz<-ra(Yl4mkYvj*T2bR4i4DxiL7Y_2Qa&W z3?4&5Ru(>g4Qh@$2Y}B}4-u`P=F0)bc=pT1wF{Rr#Jf40a4kRRb039c486$beT zoa%Ld+&(hls)?D) zck{Ulc_fGNMPEOx+D9N0K7Ba~pqejn)c<(2iSRWcKZ17PyVw%&Xn65W5B|;I{(GO` zCs!W;z!cNsFB;%d5>XO2H#Y`y4>6U7OfjV_a9+(=x1F+Rn<5@S(5`(2g%9X(e9{xw z`(JZEPEVsifadp0H8}8`(6wv3uf+iUAv$(h33wPtR_6Wm(#W^TU0(a9IUcn&IPAMV zX!R$>@(8{k2qTkLjt>FKk-{=pP)Qh!-4M|8lwaMtTiYM+cCJnZL-68MHU%n|qj}=Y zlRmJuQCb6r3~qRx_eX!F8;Jks^K6qR;H+mr-k%5o)qH$Z+gpvHEdGldiFCO1>K*|O zst^>Lmp$8yf$MuK?5%o=C^&%8-i3Cn^^dl~9dePVKN{BX}n8NIy!r#Dg$F9yW7ay7L}^X>{<%jaD!1 zKM!6KS|_sh6tGTeVZpkjJ(OA^&H_EA7x9(OzmG|a0|J9UC8-wDdg}w)Vqlwi6;->5 z5V%h1=lp(aq2O|mbwB&M8@RWturwy=PB}2;mp^jja~?u}j{ILe2V^@rts(OWPD^Yc z=9Usn;5)oOO`yf(kN#^P04!LUjr{|han|PJVe#KXDo-0pHHf!KkgTeI_yVdFe1OK8 zi~zvSIZy#!C*Jct(qH8<#gjVUwf*4CE}k8W1W+~|Wff>S^F)v&1r(N);uJ6C+zF8u zV*YcI;WLm8pUh*o0H`94+JBb5RE`Llp~`1fz=bXL?P{;zEPy|BHCgYb_>D(!LgHvZ zGCRNTWs$bY?B~P#DN+Q||Cr6WL<&RtLg$GZ9ELhyTL!4D7co=c7ruL|_Us?6%Q|pn zYRB>?-63RSx~N-<$RirfW3C8LBs)_$_(!&J2b_LeTttQq&ME;*^g3||q>jrDB2&9% zqCPK}j+l!`G{oZB1hA`~$Dxa4nod!IpeKYxl}Y-7dZ)=k>XT39^R9Lp$Npp@Q4!Gc zJCTfs5}-0cVem1HtBRodnGgD_=*$;=e2HcZp5n~QKN8aX1`qBWI6inMA&g!W@k_`qFBUzXOJ0Yx#W?YXBT<|#E zm{$8I-2jI+r_9LYK!@F(cp?Wwda%ww<$z5IlffAaQGAicrs z0Rh?6Ne@s`-oU&gK+uIT5&+Sx;4+c)--x%{m~j;1c4Ie z?VdaknSd%_)uHY!ZefLl!+&zHn|%SN{;2O1D*{6d+SSVZagY_@1QhOe$0=VRWaQ^q zzAwShpWPNb6A zqG-+3DvfrUsu{}}Dk>`B)}vM9nMuyxG>N(EVqJ5&lLp3V${kAsMH>Yi_FFcwEg3@V z1L@ArO#!m%Zw+IDiaLxjHhF$K9M+peFZbg$KK_SJDH52DY%v>zF9(B|(@<}XSfwMq!sm|Sv@?iu*I7q5Yb%c6QoQYKHJ*9NZ6s!8-Oef`o# znWe#E-KnE}weG~ek0C`(7TxxvkLB;Qu6SKFJiRmiGCYZX)Wl}>Ll;cvAI~<}D^dfA+*mM6M$b*}=JKp`&JH})@Uki%G$Wm#(-yXB+mY)Cl~`NbGCV%D zqrT#ix!qqGWWZ>;&J}9fW15*N8fV(2Z&-i9dyLuTVviM*eJ!D&eP8-!W@dqo`h1h6 zUoL~bL7IH6X=bi{4nk~Q>|9T7=)HuDko3xO;-v+Qa+YoexpmWuVDa6Zg8+}fxifn; zp=L+JCT~%*X8?|u6s(4@04l9EX-=yC)c6xdLUtiKoow4@ZOq4Y=OwQ`U5n{6&D6Zz zQ8n>b@u8^~!*Vtw+6e)@qQR~wZ7Jic==0rC-qy(%+uG`cKGmh#PqfwL4s9+^VxHti zZ{PDb?i;Hxjb>F>b(kK>ZCXI4IcaWJG}yQyN|F%+tA^wcZ5Y0qtPaqu;#29>Z{Y?+ z$TPM=3ghvT|Fn=ivOg9ZTZC@WL{yDMV@7N=D&31%llFh%*r|B2omypWqZKdJQ(K2@ zagm+OPGP~~K#6LB;jBJ>a475s{9lp>Y_RI=n6KnV0{>DEYLUvaZ?QaAV>vOt< z#=5x7bg0h!RXmPSwl5m!D0*hK;HnM0?-ehIv|DR&%}-2YeKUr0UH!?+#81Yu@b9w3C-9j(4jFcmJQ z>^*1OR?}7&=_6{F?0{7wDunqi&M{kDMnl7SHk0kHB7;IDmb0 zXw~8=$maQ5JS1j)a@4lyZ-XjX?01Q1vnN94JX!2E-bmPx&y!xF!)p};DFBA6tszM{ zvD;HjyMl*#Mz~{mi}%#7uLXca5Va3^b6Mi!4pxEhn6v_N%>dj{8>k$za#lqClho1C zapS#RHod-6z&+c)D9O|O_y0gs_g87H=1DCgXflmEnx&y#C5M75Hn7<8{s zx~U~o%{L7>c})s>3}T5w98GO>P-l1J%!?i#ANNDPBY3KFH|Zg8q5-)kqy>ft(Y7-w z6{Qbm=jo0byg7%hL|%)8AJD1LC41)P7X6O%iBIJ*1w2p$xD@ei>S7aNN2ROJ2@~kc z&2jYni$_4@lvK_e2z1wG^0C4W3l;mFWkg9*eBgLwH6Y}je%C6`RoHBO+4ewqBq=6e z^Z*t*XC%!!dz(QTnV6)S*j`DBZ`T9sD|HB&AKkNM0+ipAwU{=mmW7^==p+y~eYYR* zM&f8p4uEs>wFgkK|W06$&Z*_PBWYvUrw`G z8p7nZfDnH!b)EFmAeydoDMxdu>mO5KH%)kazz+l-34Vy2DZ5b-d{!6Tz@G1 zlqnN=VY})lV^N~k85^lfYbzDwu=@ypT*`=S2hL7@qQimhg&Ete85{BwCe3L=FN63|Y?ey7WiLh!3O+ahK&0qfbN?3Bc&bo-h*nq2Rv3qxEI5q63vgEjQ9?@ zD$mPr{m9D?;p5#`#?T&>`Lz<;1KNxHgE|DVq&zKC|IA}oz?dY^WyM3dk^pf)Cui^B zfV3jSJ?+-}%_J*kqDQjy+iw3nSwxT&f;N-Xy}_dit8?Ke2@^0$c6zn5%7@P~OM^p_ z>Gb*(w{TPDBL)yVd(9y%7NF%>h@L0PV5(i*XeTnkPVgB^folzz>PoR|;?i&ffg?OGsDT3u3&{`$oig z+US8--YlIxR~!RjA#fRFI;Fm5H@b97-) z-S}y+{r|(>dj>SMJpaRpphyt}Dbl?vD5!u)Z&4IfRIt!HC@4r1>CGrbk>*vA4qgjL z??tMFqI5+Pdgvg%hnD2ogCzRBO1N*H|Eu4-LpWu3c6MfV<})*agh#&aPsc6JDa!i) zRLXz)(i8BQc9fVH;jv$E6`QPO+x&=R$6R z=`Oaceh}s_WDl}lt84PLBtKA6nDi4J2C7B%rgXmqkfTT zL~+Oa>{s~_PI2udL8!rOfUI;KyA9mpn@s`!5%Mh~JANi!L{Riif)m~#N%ejfQtuf3 zxpOQjKSQ!jJAs&xk7FljBErOe8MG0EEBrMf2d)8rK#~A-!9u|tGG*N?2AI z^I5{?IC2=AatC*r4iKija1KqDFugJ2^y|r0UE6fjHlqeaxEsKsEBLJW9igo`tOP_G zXT@SaY;I-jJr6OI-=rQFcaf?3(|`rSk%(9L36S82NUR=6I|GS;kd4-A{KQ8)=b8j@ z@%}>@6*$d7mNM!2=i5)y8dZ*U_6spourGac+rRM+5at@QQBbHu+Zwo1UnP;yO9mH* zNaos8^`f_KkcY1JxQyvP75Ow8w8lTJGynYzUM$IY*?i~N*X4HnTnFx}2fRmtun>{D z|9ipf1o7o=c~AV~uC&pA96#1{wSOgKUKr`XCw6y(Qm4Sq0)bnSj`?s$kZ=cv$Uv{SVh4_) z7snrfi2^Al?3B+=+?KbFH5DBI#bbcV+(pW#0k7Kyn4~7^`2zw8C|m{9=m)BWakYzX zc44KxMKM7(B$~YgKq#0nyLYA&QF_%KhHN3qbW$-W_(dRDZr*L*dufBP|D_-UfD*sxBJq^>1qWut9aYezVAl_&c0Y%{!W7B8W^hlQL^IZwXRWqB|^hjKseJ|a35J+jcAWkai~<%E@5D6wcsoEb*x z+1E=2_8S8xm_H4SE>{1lU$hQ!XK!4bI}Pb z-2ucU1^g*po3#oVc@7rqoaStd!E&?;!mUBddTs8BD$WENQMQ_@Zw8d}7_(*v6&R=_;*7&~RsH}UfkTvzr=rtRrw`rr321tXCB7u>1$!gyoJ>9G?Z)1AM6ycWde z-mVnsP0u6GXfR*IRm~B+KP4~9(P*hnKTjN1(Z68Ad#k(1<=Xv{6OK0}jkRy~jiF*Z zOr6U=j14o*^5}QDG)@@D_B%z6*iA(na$J_5&rWqM!BwMo`3ZS6KR55U^ET93sZZ}+ zQ606=qX*V%925L=pU zm`dZ0dc!c)PhR(!gYR~(Mi*Y3*J^hiP9M)7)W@t%M3(Hq&HZ!em183po#|1Jj_Ptj z$Ih%VJ2{}F;EuouYkzTrjZE8R!y|R4ulpIjF8iYAjO8p`;u0&Uevz-(>cg)u zA0MNcX26_UJg942i7Daqobghix2!i5BMTWAzjdB$al>Muwt8DFs2keABzKr#wFXBcip}Un zV{~K(3u3z#o~2K@EygUZa5#C{OFEW1-PBly`mwCxf=g-W@e{r2J>ijl)%9MT&kdw^ z8FgXGihKXI(dU$3sLbLBatKg{;V?T(_ae1S>tleG>egsr23mRDU2auFW4u?m!*c-v9;(Fz(w6^Cp=Z+9EHR+2^vH3~PP3ua}ox5S}Y;$6yW z>E(Rkq7fIF+%8Qa3!BxO)&O=CLN4;0C++0~rVm1*+>Rp^s<{I!jhk+`>#DpC8s_%?TK^zIXr&Bh9}rPmzt-1xbk%NhH}a?BK6(6jx>t3jCWUXn~O<;=6Jrj)=F;hrG3)A|8%=7eql01-+CB` z2!COZ(15Yt!cFShoJL*JwJ`w(0lR9A_*%KijRA%5bK4d}K1T-fdKQzZDs2#Z-cMHfEM= z6VF<7u;P9@O>CiQZBL6K_u8DZ(d2to*Y@XjuIg?{ zKJK+PhwXi#JWJmm zP$5Utzz*o>cCu z?`*tZ<1(18b9yy?O+x%#$@6_V$>fpwm)_tBlN?|tA_}Svg>f-U+O*8jrhex!rIP37 zvUF?t8C`5OS59Ap&cc|sYTHRYyY?32Ko`q8EqtPSQu&kJE(Qp>cpewCRtG=1s!~T_ z8bHjP_+BhJT4@7G(IXjJW7#oBu@A|pzX6?jbIqGXSp*OGcG{te0-vNuC>DtokKsWUt$ zGN#pgA=_~Zzf@?8WR++1#QGWf0rQgCK)YQg8Jq9WKH7K*I8&63y6Ta%W3p~TWtNT~ zuB;ce{4YNKnkl9gi^3|@R(r~7^H+~;blhU@loA|5j%rD;*P zl=Lcg%OHit&};Ox=+(~-&Aay?G-I8s%@I4s#cX=>dsv#(DamzTK1_9dK7Rf-A7lB<;S8kDCpUg?+zB)kt~PCloc^is#t>g)mL`fU$S=~}w$OIGfbe^#?V&KFX+_)V&= zE?jC((tZfPvWANAF~i}DF;iKXaC>CLRD0nZR>&Q@G97@De?Ji$JGYdDn2Xio&Q}7i z`Vcy>r|;A~yF$W)d=GL|pP*u3ODcPd&lbEs*3r@Kj^w$sOUT*e&{0Diy6|*hzeU%! zBwe>sLsxg_t}L`LYIw5I|ExH)-a@CzSQVf?BhCf=>7uk2XAs%Wdyct%r zQMl(OIj71aXJekCHHPFby)jvUjMK6-wTn+&u~gvfo7AfpoFrooK7EI~GH(dQv-h~m zO^->er&zT(zQENXzdECu?#$Z9q;A&Z&ZyXiu8w^@?5NhfBI9de%Y7|YrmptOT)o+1 z6Uq{fyI0hwoX;;!o2TAd9k$00Ec8B($9{n=6o6d8aj2Z8d1?S5wD2cqE~q6Ms($Kz znHHr<*mrsZ*A=}-32n~b-&{kSZA`15Sj-N)OpmscOqR2BXyoI@-4{J=HP(<@rkTg} zrhBEYbT@q={y_QD1Ipz`yk^I&Ca}E+VmKSzkg*OE{S614d^PbcDhLJ7p{e0w^asV} z)BS5t?7P{ym*dcpn6)ee{I}_XH22(@DG{}W_jw+vc2UN8ri^)aTG<^9GG3C15KN^Ry@N>Ex7G?2je==OdQ z@~kN8h?z}@_p|bHH$V*w70q`}eyJhlx==HJiTyv90-@m7M zTn#H;_RaMQc964CESL&1?Fw329ZwY!8#2=~RhZG7;s6XJh}fC-!ID2svNb0O=?;5rPY1JWm!4^ko&YFe^%^;wA!nGw8cOuxHleH0^bd;j;SP z4m93uX_3B!xdd;BrV$KHqbkjxHR$zBy1ctIXB1~O;XbyCrV4iX{9V7pU}U5Wvo=>) z&LLE4>6Czul0Ks6iSwE=aj1;%2HkQRH0vzqqWFsst|50}EIG$8%V{3R_uu2NCSU>9 z78E9F71qAcvqU8eTE^}wO$VR)fJOOc07nz{w?9qYg>77yBW;P`v$uTSS({*UC#Uc@} zIXCpJaSfy^R%M~N*Pt5zwY7>nm}U(%rm!a$8PnV{4##-d=1<{uxF+-agQi<-+@7JC6vK`@g95SI@Q}VVK-K04>G*{Jlr}lZSwV zxiD8hltayjf!FqBS&L!5Kvdz_O^@v=*gAQQ)r$uLT8iWsmb-eRM%Q3p=5W(Ky#=b0 z@#gmE<=*;UjKMvNj#bTK2b+Ga3N|v=s<9YZ8`fk`(l!&Z zJl)6eR^CJMm=}#f*|Z|nQLt4j+vh3ugBu^nKTwWIuvD7(^1UT3!QYjh$?j3H!63o{6RnC zW}jx`#UYL%?%O+pQ5PN$;s(;eCtP~CMBLD}se`0mip*PDg;x?z$!UT_d*2JUoo!yv^y#q|DyXaQ} zcS<FH&bv!~Nn75<8{t9f1C+Tz7J zIVV`!os%-fvu53ji=ioh;ik~Oh?6n2b3tU!t@P0u-r_V|>hJL6jF}n9Dt{6)>z}Jz zC9>Ec-=BKx%e;)jnq&U5A9}aK%1m&%&|0UEw;OuyC89W})M;tS0nd3HR~9+M?Y6YN zfy(CnS_{k(Qp7Sq5x0d49w4Gde)sKbY4HMK7D}h9=nf*5hf`sl`@ZA5aQ#a8w_f!x z>-c!yUE*Jt+=V=D!M3I97@nnq)+&0h`G>ZXsV{wOJn)gdFB;vgP)1k3X4x$Tfx@Y= zoydeVmrA?p9?g5BP9Eb~Jak?Qubv=;y1;;$xIN9BzqKo4+y{^yHgtHq{!+hZ8eM62 zdOap-)}}n~>Lozqn~vQ#ggn}l%B#>_CoeKh9)j_12UFP>c2JRoz0Y>e2q3B?|M~v? zyRfLJsDvb_qCWH)9Htl!2b8thNGD~iYRYMDfHt@QLR5P+_K$J9}ia$Vq+ z-f`B)u#`Xfk~em)6hjzDhmR=kEs_vBttH}~DQ?2qm8ljU1mo^#s!+F70Pa6(ywprf~`<}v!WSKrT&MrQsfv0mp4 zv|ETNJhDWTgBvre+@PMn@L6bAgQ>b??=x|@^nk2Mz0Gi%8Z$@=2g2^A$WP(-8w=jh2NTvfBa^RZz{dT2Rc@&B=-B1K529CKK z&q++Zm=Zu^yE3P2CEceV6+mkVe?X<@Iar7|J5^wm+D;J6IqIc@bdlwCtZCM39p{0e z^)eds@xjR9XD~Yu7VwwB2oSItzt3ghEEtE7==MaEknc`j99tVgAb+;HQKemuC5 zomNxDlTtRc!5$;NCqqpy0<{QL;0g$fAja%}V;Y8YXLvKnkX91i`>cz5gUQ-CVHbws;YkB;o-K6?^u1sFrMeM z3SGMnj{az4@#b95Ym~d>9XGdv;YY_6fWcyOIr{lOdpJNPDV0ME+qF<=cPn6LskHbw zlCVapj=nDgNEncg4uS~<4@{OUP`S0C*|2r|C8)$r6ho!G>jbhffg8sx!u zI#9ETbMfnzZ_&bb8|pls4{~>0>#`92s&b;y+r>bo@2cbJX3dYStN?7epwrE%D1#pM zki`C?sNx0tcgLAa#BD*#&)`(rQRHFTp=igN>{=|Zr#MN(i~Y_Vyc>-O;@mcP6SfE1 zaFSydn?E#g4Q#mBxkO@N@)02x+r_U#oxZR1kpSF@qqU12@S$Ng^N&+uMFt2?*|zD7t<|l z#$rU2{vW0&0>tg(D`Uh$2XWVW@~>%lGYh_ilh9(kA===%y35gULa07d2<$fx0A!t@-?KzeN^X7qU>e|&%oEqDNE9Y-QUN*3pwtEdJ8==bLd2De zK#xE0K>#pzEiZ;d5b4SWdX-o@i>eZ5_xr^g@D)M*IpdQX`w4y;f&y85U{={=_JG^o zLdbLAc^PC>ze3!G55aSELj%P0iGQySP;&FsfTzSnMVSD$^AWx))7Y~A7jA!7#%hD+ zxa={%V#1PUL14B&yhe`-dX1P^%Z@zDj++ph7nEHmLT<~9$RY0ra77)cz(bYq-(P1Z zT>|-21hjkXP)We>6MqdVChtE=+!`=D3IJy=iO{4qY#CeVBTFFRmgAKG{8tcu0i5K2 zEL!G)b+Pr3xvZw4a~~-ed?9e?F%$9T!dKwV<2^$Y933lr%OXbhj*zHNW)uKEYFhDa zFM&`nFR*cNN|Qv!`TgQk5I}c14%M6^Y>Wa3FI9O`fPj}G@Z2wt_QZqlgc6%amYtzo zr?BSOMjez9$$LthatyGdIKkunYTtGchFMld-sM;ZI>CfBk_LZ)yqKJu2qc3ck+a}^ zRLO1kOms7{va%|V7`!F2;O`d&jA6IP1Zy_bsMMKl$$BaLm4x-#!FY@N6q^&m_4~3A z_1mkrz&)xt?DnX{^8zGxMzKxs)*j?J(#2X&(;w-dq zksR`V%{XRQP5k)}7c7Dyx5&nFIi_gyx^1gXET@SbZ(vG5W1Wz;^CbWWe_4J4S%mq) z(}LUzY{C0~CSvXdsxK*QE_L%w3yT5rZuVrP1wl03fTWozlUe>BMo5rsbL-7WzEp6f z_g(&DxnFNK7nbCxq$+634$ciLLzSA$LF} zu47U&qr9!H&7C=sodh?XrUtW#2oK){3`k_sbpbJQEkAnhBLim+6Ov{^G}l)R7+H4& z8$nT`Xe$zx_x<^y-bg1@FI}o{#;pw7(B_TH=oM+J5m=LVm^`J>sItdH){}JJ6FszG z(x8CZlx#(MuZkWIRhoKz~dDQ*2K12`{)$fmjYG5HXWE%HMZ=!o`mwKL(UxYIf z?p|h*#09*b(~led(@;0qpZZux`;XL?pHu>AYj(p)`P+Z~@<+O@H!$f5`m1g%^V-Hv z0BJUmC#5>d_WGU+>IXKLGay&uDp#5R++DC{Cn?9#5K3tzb_~8#z_GLZi zK~{Uk8=;!WPng+xYAVA+@6&7u3-x*OO63A(_Y+cPce)n*8))+}^RT(Od2aNH%n?1;FM~k` z^v3Jl2%R{E|AEmOZ~=Xrzx206V@JkGaqabDaNHRVB+C(f3WetR)Xu^pG09B!DQ2#dXs_0h*9)60^HSI*{@6-}#G@wZMB{zoLo&kJ7`sDF(Hlha3k!^s?j z`U6(hg)^7;C%=mC+`e%ayq8kvp=OFI$|9#gKixac*J*)hD*u$#1a5&XQeNdL^RK{` zv=(k`sPRT$Y66oTVH#mBy3i&}N5JC$B1VWWgPA~$Yv}#~?Oskj?hd~)Qk(b0A8M_~HU$j; zOCDRilN3V+Kg7(2xh$g0}?Y6tvP?iYovx%6E5_B|(9J5Xg@ z1$HLnGv+5eyDRWjC=e_K?h~dlL*<_oCs5+&FPX7=j#UDv@`$$)Hc8Dkd$ z0->K(>teo{2>f;BJO?w&>e|wwoO8S#M@L7ZVR8x3bHE|C|r){>S^`4 zMAX;=t!L4U#U$o><2o;>X}UydGFMyCQ1G}nNx$QG?a1?ejs_G;bZfL6SJ`u7uf*2Y zt|dGKaDrX;Xv`O=?>Ouz@@E*g@J!_?v{=>~$=1~>mYM^q>{n&@C&8Yc4%-iScHr-c zDLTtZYzQ2sEL&v@EQc);M+4Wx`;WGRw^hR%sPHX?=a*> z0)-VXSLnz8B=R4~esG=a-aaMQqoSjuBjY=)i?U`)`&kpZN+bh6CN%(J_MUQT*2(%_ zkveA5G_1?h4g5d4!ta+2Xzl43M=a>5t=bo}W2~Z!zxxEf+es&6?H!m!=uT>iBmtzt1x=o|mTq|B9wn60ByAoYUuKQ!!N zo*;>%Yg*Ii@Y%V`W?{wcp9LFutJ&CbO`3XxG<4of_!ZA#7W=me1QX*A`we(K3IL$; zvz4a{yQ1ifKRoc%gCq4qr~19nv#C0egu>_v+DWANI0pJRE>hc8D)rCVU97i!75&_V zize_8H}lC`hi|^RufwUL(_ul^xUjm78}sGDwQJY(3poeF-u;y=*1+3!HBW5BTydEE zoc-Z6(E*81=7xEc-LxfvGS5-%D3k4;DqoNZrWUx#+g#0Yv9&bI5@_8!x+!z{t?bnq zd{z6DCKl%;M;jL<73^I-x-qNz9qgaj2d175vF{tg+{il+`C404uX75MjQJt$$g_7? z5U>D$loC*dU(B{-yNh@8g!pwkCNO8sONae-Ni)AnN#23*4ObQmlITHAnWrxd=fq<3 z+u6mR=QTAhrdw6ID$}{&Tdm_X*RpMj-nDaioZB2`7G<@NX!pF!yGkn6E?2BUOe8gz zZy}2dWp*_dgT?K*h3ll9lR~=ltpsL9_VX$vO%l#Af7&u3P(IK$bLR}h1l7JLQ&&nl zc~GqAr*^N(s)Y@AU{)V!>rOmz{M!KGy_-cEDWDN@T^DlbiO4FX%}eE zou$(+9GlcXFK}8Hi>we{#CoEtFv`oiMr8^U-O+mCuEYC3EV(BOl~nDU1|CoQtXQO4w`6sT9+L>K zXq{N9n!$LMrj~7I_0io-xsUtVhpq3^OyI;*?J7lrlTxmbpY88#v5VUF^)0Qmn3ty# zR*!4QyaDXXKP~0Pe!BrV$#my#W`o@eGtzribS@`Me^uxG%fFtZ?sz@dpN(jI=c1>Q zlhu>3zv@<~Mp{FTz`*tU^wNh&VA&WxJq2!p5%ovIhO}6xg zMZQtF;W~(8{1?hR1I0ZflJmAKl>fuZ_&=imkF`bQ`u|r%zo=wnX2u6XP3fN472vB_ zHnds!Rgb=J&|?*p~09#h=|hW&1> zEpqICFs?rcJF6E-X@c|klf5Ir88T#MZqC}4SWUtl|3OUnQ7He`4jB+m20sHBpK%aH z>~jBS+JpGlkCsCSBXEL#0Hgwo*=M$`K2&xC_|*9NlUYGJ5EPMk{dwyhqxA*^9{<`V zcNyv+3L;t(!vdQ}v^E4rF}N`qiUHGtim_q`!#cJa7%T|P`mSsNsP}Ai8$29ZrJl6K z3d49e%D0&E!FWOGn&dOf4kUF1M98a95^UmkVK#D#Oc3yU;iG**X)B2#3?<@zmK+0J zQ2w(FV*&HdIR^^{m2zdl(|z>?{#%kj2&gbisK~k^US6K+qU5Y*`7iS}ts~p$gctwZ_2%faU>(1|Sa;G;GlDkU5iOoqRwth8S5B9m^ZW$PbzR%a^4fg)%cB z5J(1p3zk@iT{DH0kq5oz|1`q?dyT+%3}E=6K(kh3bcVTozW%Wz|WDpW$3Z9 zAMBzn-u6?q!M=jSO@d@7TiIxys5*doln(1WG|1rDmi)%t0Ke6HjAVhJ=(mG<#_eMs zp}ki5dfO`3S;-4zNuncS=M?rG61%h2Skw*!C3E^xp2;qTk-b1_>=IFHDcaE%o@g(T zv^0KN#)!_XRGSDz*=#dVWI{VTy5PiBAc0|VqW75#*%l=~dNzXCPJKhe@l#?ETM9}F zK7g(gyKRRB*gR=?!vV_Gi3eiiK^j3k7rUNw5bD=XS(X#IIK3=cpW5hR~t zsqfKT1nbo>w-2DRz$ES@=(_Q3N^&AMd`bn<+;OWDxo!?Sx_5oXq**sPK!u_)QlEw<_cx#c3n^0fzTB7)+cW%sed7 z<+)KBnZ_ChbX!73^zS@Ivfd{&xDXp4tlS&ZKpaoY813DJpzP%t0;L4n%-9jp35JmdTW z9$N{l5D)P6@y1smDx+lrfB*jdQ&-!yo^y9V8p#$rHXPWBHQ(;-1MTV0Q{4ltgwT@X zo<9;vNHaYz4`6`hFlBWH-gIyBF2KI1Xfh3j{Zm^WH{2l%hAB6ctjI`1TSx;W+e_!u zCkc_A-}W?N99ID@@5SGbg=s-olViel>y2eFdhDA6a5tvT77t{Cq*xB9*^^4Sql>b3 zQnSEdhyRwF%nD491qzJq*qgA9SUB#S*Oj)T;vUCVy#47hkb#%^uI>Y{-r_ReY6fbf zV_4Y{Vi10_HF*QTdV-g-jE@3*wm}?P#2e^4>|S=>t-i;W1ZJouC8l&A)=kOM4$aUd z)`Jl1+;l6$m%$7*pn8A4djZRacBT1x-y>Dvdwm?A;9zpQ`L67upt`6Lw+=6DN51b$ zmIiv z2n(fC#_4UfH>N@N;-e}M?GI4hn|vT9BO`NKaO?g+6q!)9pM`xVJ;9-n2{3?hV~pIZ ztM4T}g)_992#}1;R+8Zy@XQKodlhfB8-C0>#28GCRyF3;f4n^zEI}sDb<2av`qG0c zq9D&DMpp4Z-VV8aZ(tX;^tG&OMVK!*lbw)t@%@jtXM(YOUK!h3ph98*P74)vah|39 zkGKCQM~vE%g@1^cebA|GdSIXQmYlaPL_Y(Y|Ik>P(^mZUL*kw|3b?2Dv*^A5cKhFu z+W~1fwdr2{pE2HmR2Jfy`N)5}{UDT8lU&-pl@9u0A^qp$_>bHFkJ10AuwehcGrI3| zEO6{}cCFdWeF5;CGlEWC>>cMVbz3YPtm6Ox$7XHn%4ooXLoNW+opz^lds~o_-O!(3 z)uM+OO1p*cs`Pft zj1=ST4{375?nwTXVpSQ794R$#jLGHcWJ7Z1R#{sG%FORuWxJ`@bvo9?GRdA>IY)I& zC;D5yFE(mqVVSw(w&�d0o4xQl_12qMxitZh@n`Y9s53Z|o=Q@r!rcT`GfOjNcE5 z&ZsoAm9x)BL`00bB;^h@PuJjA*u^XInZ7mW+ybBEDvf*Qvy-O|L~uqaHKP}#J? z+Ty%c^P#SLn~f4?KuNu)%%ZFE27s&w7IcCfjqQ!!yRj&5klc6N84r>u5p9tM`8{$F zR;6>5BX;#UL&t<;rDMbLv%(;;@wLqCx$stv;RZR-j&}L4yqsb?&yUF|y^an#;7%RU6&|vW_*`q9Gi3F1K*~! zemdRHK45cy2(}U^n9FQPqYq^+&rO_)v61J*myD>RgkwK%zx0TtXJtT(7quXZ-0`sV8%pQ~Ht&KPCTNmvV%F1!&1cy{yBQz-;Ie z#X5jDXaD3Gq)R1;KA)}_?$5a0Rq{O8s?zAJ2b(D>b+0aw5lyT=ydGv{fa$-|{jyA48jENWIZ{ zJ>BB+7YOf@CJN_sT5L4S*v{!ERC)G3)ljrrpzoOepkmaD4tCKuoWU69p&uh~a$kjv z7bklq-GgGwn3oUQZo9go1L)*g@RUp_G&cv5YWw87w%| zJl$mxoT>jhMWz{;A~^RK0l~z9=r)1>qTse(OYB#E>_aTjbEI)fZW z45&EOdLZDdgq0G(FTxfJ6MfV|OYPXvCb%QRJH5(0_0Xofj|{#>bwmEXLr0*Kco4_9 zLk@S^$x)G0%i-292!{?9e%?npk_xIM46^}M-sWw|p1nzHmtOi4Tm_$LK*DRX-AUuM zG?HNNFO(-Z%02*G9YZkpPxSo315KTDmLMov0reL$<8|FUjR>5C<6wN-waPVjrLyT0 zhN(m0=rMb&w@;;879+IaD);=tA*Ib^5G0)9AbcaM|LH8j^0!G>8Z{g@eiZ z#i_#2-l%O*5F}hKPL}dPr2@K}1YY=erGJ$)t|E~B4|*{DAk-78vZ)1xA#`UzdNWAQ zQ6c~53SmXyA1PcZ)BBno%h~Ii^hpe`jTN98!R8ioEE#oN$Bn0@krPH$MF~Z4AGNqt z0xtm7PsscaON_80>u=|sA`hoEf?)3zKTA#Uy1=dXAFXEKQxH=6!!raMUw9vc0{y@x z$9A4mK{DLu6^OE~zr0NNN5W&03-Fiip35h>LvCpB=4?woEG;L;Wx|l;vdi$MWAr}Y zHGp2F#sGKbG?w+82H9|X-7H-XfxW0|rbp1DDQgNGQLKI#CB?A5L@Vm|@`C4M1N*%^wf9A*B6hFz`* zE~ZmW#KPuf0F1a)bH%&gsf9!jXxInn@yu2A4=ny)2zyRxPdW|qutf|BK-H(eVFKGh zXW5#sjKm~Fn9a6ABW*4cT;#$EVambb*wArwSeIm!%)wia1td{hYaq!>GN|3#XV#^woEaS7{b zE$`k9g?DUAN~l%q`Y|Ec9tz|gX*+{(U@4tFtWPQvO^W5*!TZckCBRwdV1P25&EdQz zNI*-e-GN7Ij#JIF6#Zx*RH<}Dj)WaJLThxahj>t$kd3WGrqPMGGY9jM zrs@YMBj%AtQVW3JTP>yNgA|Ycoj)b&GV6NUgW((M&G{Ts0$OBAN8A^bG2YK6b904z zs0n&!uRb9|mpS?rc4OuLG##luAUO|GsJ_64Ilv!PLs&G1pA~90U0Q8M=wU*7@d?qN z>BVQMD!S54wvg>0?PX{B?s56&JC((V-=GDteKFqW9R#BOVeBXNLkVn??#xbQKeK>9 z<$@*ApyL`JNhVpRKp9q$6z&A#ub@($jX)C^0DK-5gwP&;_#iYEULmL$J817=$NEcZ zX$A#I6g>M#dCvzD1#KHXG;sjJN;rn+{=Zyiv z7Abz@i_S$vN7ozdQM&jC{X!rh2;MW%8P(U-d4p{BJ}*#NuHE^Ub{L1&vuY_(V+#O3 z=I;}yLxKpMP`!cw%@zK5orV}VlP0{eIB3RN?(+CI zvkY+Dk17Zstl}Wq`gG^yDFQV?3d4Ap*QHjddPCWdxNe(Z)guzJjmnz=klVlgM@E41tcJB7ewzpwxdUg8Z*U@8%k1-nVGK`3~upsFtVt(nGSj4$5) zB;rX3XXWA;K(I0X^l3IYP%H1=y=$+^1&6Ib-k5TAPJ5IAp$QBefO`Q+QQH-!lr~*n@QvGh z!a&veNo!#_xh@T-POQM>gF#|=@^ENb#3lY*X3qcIL}-R!Am8O}0zQ*N;OI3JcjC|= zR*=9^doKa4849FTZs#|``Y(Yf@=-yeB>V#jgdh@}O$RYmaS0%Fj~;L!e8Lyt@z@(` zyNLYv`^9%=P-_4}7=f^M)B2JE&(?em+djfZDEoUCI8BQHVOFoV`eR8Bjj1Ajf78!j zLN7q_$866|M;@I-*KEQ}ctf_q*-BRWCfqoB*FZB`uqZo}tRDS*?sw@h1lUQNnRv%z zDo(GeFOTPwHm{owma!k8ePb zRRZ570`&gC|LZF_3!x+Yw6Aa9MlMHLvJxodm$o*Hn|>r_?%j{8ZXCKWx?Uj>RJm+u znP0lrNXvSsriD+m|m{kpOcXgdoSyi#!1k{VrS; z-%;wzx>lLRj*WuKiw3z6yi=afTspML(=w!eH?82u?i1P(p@`%oOk8)_;+|NDovZYi`Y>5%R5hG`wS zN^yD@#kn4@$!WB)S#JGgL&N3ePWvI4SvRJ8)qS!sGD;L}F9fJy0WhISEE-qnD5&lo zPRB15_wlH9zQm5t#^S`0I=8zbzOXwDp0}$A~!V z2LhU4W_M9lbyN%^ugjcvbFils&D_m>+0lqqKKh}@#A7_?t~DO{DaWL$Zl4R2>&Vx5 zlLo1w@#cfEE;HX-2U}C-R$7e$>H1MeoJb^#L>ej13BMp6m2$AF>UQZA4PhlNwh zM=co3wymT&kB9r2j?$=B;=5N2>zwgjT6bPm_6r(n9Vz+Hi86WZApXQm#BLM}OCg}t(;{K`SQG6oRZ=uR`?OrFv1IAqMUR>H&NoI8GbpdMg6g@> zIhoapJqnV!)tDE3zIkRBg9CT#D0H+W(j`^4@OP>CrwaZPTx(NunD1n;rNql@74`O% zM4^eEEQ}>6@DUY98b~Ua~Tmiw45-XC~1v@6C45bxC=O z_YEJK$ZBmcaG&sL9r_fQ_0pzja_GxRi>n!VnrjaHhOT$?oXqVDUApfs>SxrM)Gpq} zp{4HJDw?gTb@6b&{dutOov3xNc9wai(#{lCQL~0mo+Yz)lPc1J3&n>b&2jR95KoM% z_Mqn!!j|)TL-s40%#VCo*cnFxn-7m*U;}0F&zh#yjC;QY^%!l*o6#h#zLUio6wR=!deG7*flL@|v1z5;OfVi6p zN-Bl~sBEQ$&N2H#O{(NmUelhm%QNg;i!nhlD!p$nAv^vW+ILd*ifEXeo8!=jf}x0{ zgt68Bf9^%~&9HL4m{=Lu({H-^s&KwqZTr_@>uM_wRXO~P?pDL)@(XrdxAW$2%i9#o z$calkV;w!lGX~}D1}3Y`x>Zq8URbGhSznlH9M0V5Yv(oAykyM>i}R#V3g|e%?R@vH z!AEkfg)E!^!F}{Q`M>=Q)84KHkkTT?<=(QTPi2Dq=Ruwen-<(ExbvWa^Tcw|dCr1W zRa4X@Cn0HPv2^BvV9&N#p@4U2ob2k*va%kFq6Ux7`D^3NdeS;+uPm+i4s$u$#jbcP z)N7j5iu`}aBP$_&ZMb}9`*B|Dt5H`$JrS@s^s zO0pgM7{~fucXjkUPdVQA_xIah_ebZxuj~3=-#zsM3?Q32+TNtuuUo10#BJQ@SA6}vSlQWuVA`^Ec`uqD|tK2t~#TIZg z$wnW$Kb9O}YO!(iox}1>XRdkrr@fYA>xS_1ZqeZHx7*(xQt5theIbIwXTv#h-ETo^ z9BzsT@uq)weuyudF}3;q%mCxwQN`<{yT4$((TSSO-kz9)31B-TFkq1AZ}$EA^Y-OA zAhbQ5(M&DQ;n;lxy=<}u-`VaFC9+#7vv^G|qs*sbDkw>Jk!!&_e8N(rwat1yvh>@K z|IzfhBA0G${pGubO%~Y5uT3cyxx!3AIQje~^c>%m*j#y4brOA_Y9^oU;XEtOw9g8s z8CTR#wtE~AF#L|S$6gmI16&eo{LWCzNyPTN?RdV#p+E~n&KwZYcqcjeF%e}ehq=_8Lk z%jr3i4?df8xXhWpL|3JTomPBS{x*iW#@oKS)2^J~lS_T}e57V9ccHO@2DV)WHGvv! zpwJtzK~HhEcHT_6=_;s&>!V1U9;SQ2!_&IYs_wi>ZRQi-p&)k!x^&v;>Lk2L$M!tZUuE)~+on7bo#*U7| z`V}v~tf~;oRg{PF6kD_OhGdZ8q~s*3;OKWI(rbO@u7)i18Gh@nob*63<(}jbZti*O zuXQgdN4L8V?S5YbHC1|>xdJs+;JfF(4b2&crMO++-kH`vPvI$YSk;+hu6Q6L^xekq z;?+yks-hOpVvEA9v^mqJ-}KzflnU>|e#TgSe!@)aT%SV{te5bq%hq9S++u@=J7>K+ zXEANV6B3Y9c{I;u#`)@z_9t4i>m;;mS5}{?W9XhaZ;Z&UthZZjyp-9nz7Rdt9h%;R zW;i!7vQ+ogjDInjF@HhgZg##)-4SQqdl?JSSK5QPZ|q1gbNzUIA+XG|a-F43eJ~_Q zLg5-u?iz0Q`PN5mq}QAV@*Td<_N0lD!D9TXuJM&sGXMGzBJJMc*2gk>RFG z$zOgD8Tt}!n>EJOZsEF6S@YT{E{doj0jyfhv*MTU*4jcBRz;eikjj<`@&A`cJ87(H zv7M80zRX3eE>0D_oyL)&Sxt?O+FB0F6OL^88JO20-)~}54o-fVV8JfD%;rzA>va^J zlNObUQlry0dZB9480E=5Rp{Ewxa-~d{E_iycjI?7q#;(`=ev3WO0MbGGlx=f+O$}9 z$ttlS7i#xw*Q36_(NFfaQCYu=>QYc04^~fkKBE%WJ*d&VS0!$vX*uyv5=Iq6XDvCJ;Uy66Iq>1mNi?Pkdrc1&S#G&zVz z!LF0mE!D>LFh!`;z&Loar8#GpuLxA`m%y(2cB)SbFRrWJ`E*6VwHK3PCJ=6xTJe;x z|J9s+ul-PUzT|s*Ck>aml-Z}}EsL7i^8;+ZqURz*=WOpwPfZnIee`NlBR($L zSijWbvywxkn-yg$-V?a<%ob%b*t_x7cjNh51*SK-IwPdzGb6PZ{HU_0tHjE+)rG8t zS;{6V6{d|9)in;&!5+u)$F46*>&0UO*($sSHkiW0T|aeLw|vbX-w|R8@|y=aDY6)~!9~YkX_py{JIC~#W}CK|b9Q~1o_s#38i{^tYIb1Q z`-`i$M~Th;9mIE`*rRCf4g!V`4l?POB+9jF&wcIn_pzQSzwv@K5aBly=;sl znYNmH-M!{DA{Ucn7gHHgFUG*(GLtale!B4O^2mUXVP)Qm3zy?<^1;aab*9bQUW}FI zjocc3-aRsv$D=h9Y$Yu#ZKdX-yqmaHvqM*WCX%HXG2dDuN=|%xyH@|wOsFHR$85uK zJhNl9;W(zZFVAvC*s`zrqwYuR3nC{h0`#-JA^R~!?m`;PVN+A%U2&n_y&pAuM(1em zRZ*oyEjI0U_7R)AKP3FA+yBw+T$Ec>{iMr$o$%7c8oE}{o(l*t`#EX4C?aJM`so>C zAA_k;PB-lA(G9Ppygf{JeDr$C?^J=)SYvNt0@sB+Xp#z2| zUBVV`p%Iue8SfV%>gE%+Y>Li{Hyk3n#x-DW#l?UhP_5DjZryv<4b}%a+RbBk5epvY zU2W0j8yww*N;=I*k<*FmdqJh#$9qUkr{6qCSXRsd0|xZ@C*1 zM@L6K4f^9AOjtJ6VN5tCgcwtDN;|mF7#cqugKP2m*S6!*DAg!^=8YAc4BzlOtBM+G zgRBq>`jL9W-q%zOnN+LaOnJ)ANtwS3twf0Wc&}wk3a<;vGm~hLgxrpJl*_XG(8JFyP5T2!b1HX*z0+t3<^F1M(IN&#YTinayA_g*+LB zx=$YzYX+>E0VruzwAsGPCPjObO~l4Ny{r6uEsHVe{2_D1vSeRO z9tDXi%KW1vx>6&*F<)Rp93|R>lDFxR#3mXp1K$b0b z!_Mp(B5hn2b-HlY-H+cnf4+$B8%oOQke>8Zl>QO9<;I|(lRHFBLNQ8N%Q*G&pk3hs z1yK>xD&!uF`PI67M#&d2c`g=1j%gUVbEDHnMG=c2Y+{yW*emWXs$nK+9UyF9wlpC~ zEpFpF0h}RuPRdQ#(L$O-UDL%jbIAoZdNLLKnbzOG(q?NkC8uPNsFS-S6_(Hkr7ELu zPP~qzzEa9xT?y2_;PVOsj7gnskVX5>uA|)90di%)i~;^ikH-<{(E-@|8B-$DAuHy} zm1YC#8&Ow*5iEnqeCA3|ZeqG?!rCZ*6(-Rv?(q>T2&@ zws&udJ*9QRBi$3jwb@!nVze#w7V5Rt9^@{q8E4<|QQ({y z0sbnw6`#I@=5^MMv6+bisdskwSJx2gAKMS@j3gHgvR*5@mg22O?!GoVEi)$}EozmN z-DfCuJvua#TQuvuV=abyjjQ5@7_vDxETb={L$`G5uB2p!@kWkSm072xwqsQ|NKC}3 zi7n2irww#)bzyxUIqENp9UlrFZ`S-su1@~ejL+d(Z@Oe~~~$|2`Mn}G#O+~;&p zP5$=%$K-bc+UGxZlt3q9Nx*5-LmOGNtmAj*lb+`+K(W$JTcD0KwSyx1wts;gqy6ibH27b1g zCZ?|ES0=ArNGe%qOazKX*=vmV*&9-)qR(e%($L5t(n@;bn(EYqLT5V7b#vE>fTw1r zRI!k5v?!OWXRgGw-mp&uueYG0r#q^;drHU)(B?PBt(Ps^E5sfTeHkA|u(DP@w+v@G z9;Alu8CwaNqK(mVanf*Hv6xL3x2s9fE6qGNI={;l?Hc~#wc!NlCU1)iXKkXq5>t* z=VjOD$O;sxwQK$H_QOFuhgQ2Urj?b*l+9KrALej29_-M;Ql+D4ZD@;^5$Y%ke) z`*NSpyVOPVP+F=dGqZ#-kST9IE)b}*0|Rmr-oPVsz#@OmVF69kn>Q4BR6jTBsK|t9 zkJSX3R9P*T(2%nWFt%QMPjQHym@g2a}d z@yoI79?{XpKwu4iu-p**XpvG>^)<(60*;Ap=uGoGcr}!n!r8pP!=igqbHQ@Db7j!I zq=HhEy=hV@Zf)XYc*WFIPIOP8>*}&uTDO9>`J#(MbfHy8!^DcZ%N%NEj)b)$dLL4a@U@u3qPj(445jXyL)x0W>yza(OPu4v3=~Z2`$_^5gPQ=ZVZz z58xx;(%*Q-k!UX2^;L(IWN30swgg7L~V!J}2iU_R-MbO4S!J}Yxsg446-PgW)aMCUt$|p;=dPdxUdpzksMm&+KK_+c zb-fPV(~5e-UQh3qnrt{NycS+4If=&X&KmMCtvp?+aq#gFJq@dtleLCxduFGE0`COM ztEU3nyVPm0$c}IzCn~bMs;W0Dk#B@R@oD_5>!>;?>DKTTa(SAz? zWgHbOA=EcK;I5)RP{)>SlSf%OVWR+SAVbl6Sc3x4z7jSH2}X-}sjUr0#4N>IaW`zH zm-6QXeoWOHTf;`+-4aH~te<6L(Q-5>x_NBzT4^2V)Be=Ut00?cKcoAnb6R$Df%iWz zcj_o~U3RrwX|+c&dWj<0nkF^l#IdUf)4D9XY*ZD=^|&%e_=AX=Tj;{8y4W}tpJttp zTFerE;G2JPxhd_*=);o2j0jf7HuK7)v)S)W%@}x!X8Ljl-WO$$OJKggEm4x;%CjCy zmx`94aSZ>czL00l*>B-$t7u@D2>E(?!XK%dCg#YBNnjdpG$f_|u&ylUQ~0^#XeH>_ zE(erYn_&Xl1@C(6+%sJ<7vR5&akxC>Bg&i(7h7p+nJ|n?mVCF~9_ZN69r}jRb`oLP z_1Lx6#Rd2jFN=)LcgZN zEQ`IkC0YZAZw=%J>v67jPOJ1O3RlrHl+yR>{B)mmObsase5dmt0mG>{;K_Mp*OZg0 zle4>=PuchxAQ47ZuyQH23`KS;ms~yh4pM(ku59w;^4&wj1%w%9%{jm(zJ+{pN7vXVL0)g&bDhdKT((kFT)3oUcB~ z?REE%`Z}&+lgr z*5kZhIRdcWB z{w~Hqzz&DsZ1{@FB))RE;MiryJV~SFx(bxqkZYcU4x;8f8wuNn0wB zxlFE=NoFlFJ}vr;sunZ>MJuk;Xs94l>pl^YVu8&zsSl9E65wfc*2Lk4T}dox~10^!cnjk_$Da(5Yc z+uX1!`pQ~lp8fcCzbdnnumI%ty!SQXer1w6OCODX{nYZ_^F`?$DqX62pQ zysT((kG6lN z$#AK?`^Gt>ZZb za+(}pDulSqwP^n zr}m-i{iOZG#y4d8f|;ErdjCYMxQ~zZOKshU9JQG6W~LV_>-Kh#CuksUIJ^e6;WJ=S z`^2m1H3Gz7F6S@T9fjzsqREq+5%LH^Zc!A~`%Igq2^qx!$&`$Wit&5?j05iNVLs!N zMs*X)SV+~3yAn{B-<0Olr#8^lGb!EL)(FA@cTCjv%jN8fs?3bLfwlc2%E+QNFlpeX~R>Y)Lpl%gzaKDs5+-W`m3f)YJiE&tw>f)M`lDxVt zl_CQ`Mj*mupYYV8;a%J2vBaoiw1>9F`MDWaEqiV#5*BcGZNX3|vuiMpo`K!QJD|_$ z)(s=3fY9DXN6_-)Tvxh6jHwps7^s_0Fb~k`-%LyS?*k&32Iw@VZS!(^P1u71#}jGt z)m;5P3GPWa12Pp=;h&TDcgW@>$Bf`hUM6Bf#o8n70)4d9K42D zotI8ZOXCX;3E6dzdnci;dku8J1}YrM4Io0~(Lpf~%W^KdO29vJ6+L;lay9OyR%Gi> zn;b2}X&+^vaCLc|XE)B{c2kPVTd3DXyxDocDJe zD5{Y6VCbeB?8+Xe*)Oa!IQ5ir&ke`p(M6)-P-Kx4RYs@RSzcrmkiehR;waGPNp&?j zh|z2+$(2U`DI+|(kqW3v7NiP9rVS^o=7*FQ{|p2E{!8p(NCNfduSIyUbzg=;LC7r9 zbwNy8$693Q$19;?dQSI%!voE(AV_KgCWRza98x;}U);c_RS_XXKtXkUv%AVU@Td5j zIv{&vrrwbG#T7aYPGv1~KilS_yR$d>7FHWO7gI#~^Hj~;>ut;8vm9yzVB%1EMk#y3 z);;j#S0KH+sDd3dGJ>^Ih%HU6=(jxC|N5e8944nf*|MZCrkugb$=15_v}-WU!4z=; z^>J}urWa&G>c}xu6(WQh$eD^CY=v*#Ndg2@K_-D=66g*?govl@7>EoV08Y%ITG#L1 z5hn`JXuL{3+yhBwzh>z z^#xI%7eJ{vELSu=)d3vq>ES7;S)p$rYLNT_O?v_9542trdz)5#?(d)6^`Eg+qzRde zL;rru=7M%R;SC5$fpD3#505%YF4sLZAD`4-wx_WUMU~;E9uKj2{4-jyw)`J@Y?!B;OzY4w7ILO@^w*$19OzYF)=+kpqV~DMP!eBb3&5f zxpP;otgM0zb%nP!jCK%1E%MmCB0v|!UkKzubEbR0U{4zwt@gc#n;=)Z4G7OKlgR)- zLLk;2@Wo(lJ+iICOJmYH#U!dH?HFiBe`iy)JOjw$oS4qOHR54U$hQ%-iBi{l^y}OC z01x#$Z0Li|7Kb!tRy>=5T?R2h%FT29`CxJ#N@fFz12>2`3YzLv##8un{Kb9`rD^ag zes1aboeY0VLtB#;;D_uGvIPsS`NlutH^xI4(X>0meTG$2k`Ji)5xKd!XBj~!Q~VD9 zd?+T`{Z`OTa4y$pV9hvv7xKoXP0%R?L$bl+f@jSw<7R$l{0e{jmN;k|G`2s$a}#{h zAPW-Cetivpr+>m?H)SNidDz%d{4@~xN^qGr`AgDUmkzYN0@p;v6|I2(TDO+sXOL-F8@zr!e{Pyn=65p1G162qLPKXVLE<{=#VRvd%8f`ahw+`4`wtNXnG)Z_ZZ(31FZGqfk&le`~lS z79f^TFgKkTD7bdebn{&D!w@9biOy_I;qoWJPXs@{s>1J33G~QQ&P)ni-n{-lug>}u zAOU3=8JTyMC7m=S~f@tAQ8r_df)`o-2@@%(9_e;u5!~m!V`hpOif^O zAeI}9j+pJGD%=)JEdzqLuBHZILueXYkyP#4!z-bOJiw66jP~ z?Yrs7lRpPI+s9i+H-}{Y=?X#KASMA!8+akp*4ZCdE5+;L$%J&L8mMzXsuV8~*lpCI z!=N!Ba0p&;_}$_T3)-^5na2qgE{Ox%wYAwK0Z)Ov#Q?QP@`;{qO~?Dr0r@!sweF%$)!1x`C$EOM zA{2zmU>w_|=|9oT4WucPrdCm!Z%TE99NC21;B`DJ`IB+%izNfQQB%FLS%(=9O?D=y zt9^^7jKACt8p;hnvkBUrmjMpLI;b{xyGa|H5Au45k6`GO#G+-sq?zrD>W0l`aKAk) ze*^f>#bHtqJH?BMgnzQrxfEWNn(WngHpxgQ0{qwnBwEBT(4KY0vmif6UW}FmL1xG1 zLH_*!3xjo%s2a};S!{gf-vnLD>mN=G;H9ro$_La8aauiqd{;!C$`iQJWAe4cBnN&d zZr}2&>t4)X(ZJ1e75voP0GJSHnrojDvZmqc?jX~c-qW1l&g@Vkejf23SOs3lJ8<|F zQCq5R>4&JO(-d4!;41a+KtovQt`Q;!5A&J(+?bZg)&vS^$2Yh4HZ1Q6G+;xhL8dwf zS&9NtLTZ%ezJF}%psM4NG{z%u?2)P7i1HEHI>#kx$Q6)$ZcF%sW9$bfZ??^%I{aPn4Mf3msDYB4^+~?TpVrcV( z3%t+g$=KEyFL;{w*p9?gsyP&GZ>yu*XcF}{QAOh5HL3m45xbgA*J+0i6wlt@#FRfD zT3!~rfz$~dFrTN}X09M;U{`a1(w#}T_IvEA4h%qeIrN!fdE#hz3sufV z{JycAI*SPoTF#l9%_08Sf{7xeW52-Gc1gzH)P!%|nRy_ptS87HUaYIgUK!A31ZbfZMU9eE)`s02sv8DB`4dVy6r(mi~_X7f7a z9Ux=zIb`Fs&p=uI>nk^x;^&7P2sCPr6+?JX=};X#$El{K#;?bSiI%s2Yk=X$dFh5urA8vSI>v_Bg2fEnLM9KhlZv^rlw=jE>I<|HT{;L>Oo{c!Sa_^x!V^C9c zX(?Voxk~KMvu*wb6j6XfcK4(_*#9K8YUbKueD!@pCQJpT4Y~J*)Lmqs5V$JRv8%`v zB&r}Cil$^Nc5M!4s0aLvV+IHsyby{#ivVC^V0+@%1?jW*ah4CcY>CQbR!l^~{a+kNf%K4up**G99dGF%Z+ikrz3)r{?8>gxH`4u?arOGL_Yhotr;u&O2FG5xaG z^!M+SmvB=+&!oNb=8+1JtO1%S9!(EGp5j(F0ZXE2Q6fq(OH!rH$jT(RS~5?>&URweN$FGwp&eZiHLDu~j%K zScN`9$$4CJ~Hg2@m>U$Xa5G2<^4+#VHl+neYJpt{)qJ&q`{zBvp zj)>r&RHQ(}ky%kuahU(kwR3JdhKaqByn}h^2T1s%C^I7qo9a%QC2^@Xh7eu1u;U#y z;*tWD78a~;t#9&y;5LzZ=FT z6yk8AF=kAoMFpb?5q<9BNj!_U+2O@J*dW~43#;697tV5FP4UIJjcy|U1~^+8mJqKW zCl)+{H?ypBY-?+4gNUx7_j43@^e}P=U3;@$#HxKZ;6aSP3B~06yytw~BVuDdM|z!+ zF^ZwECuT!h)^);*P0`nM-+HT!ageQgm}z2dTFKh)gw^Y*JJgAWsF1TlRNfRYT%M8a zV&3$wsa`fMi3w+7WHj=wV@I#v@QvL$28+eOrusEQ4y|kk;6#V z4VLTeF)j*Uvm*R)(yp$N{@(PQ3sbN3T!%CWTs#bUh|GlF%@4H#+zU2`UK&x{i5j=M zblaJV)X%}rug>-r+g;%}6wjjKF}7^lqatU9WV(}yuZUA+z7?2m^u4cO*Z*32f+NXJ zuH>9%t7=uD1MbHBkliK2q9s&3A6Xw1$e8atqFI}uurf9(Vhh)2TVan8T8G*~3qQAJysZnEXg$7|;1u-LoueEIrI)*h*Tb%9jgGX0O|B5jVo0V zT{3sgt%u2lKhAQrDm@wJ(+ikiwKI2f_)1f|+v)-@Xx41Q)3D%}$Wv4#jYXw$G5Dj- z6s(Ddy0phktD8F)>@l%juyE@oRY#GZi3xUmaN}N&WDe1vvh4R?-l`(GiB+8iO;=8w z%MK57+0T*F+z!A0Qxk}lMMzTy!DAwCZ>@uZ_&YQctV)9}ok0BeA3PXI4ni#PIk*Hc$)vv(ZfRGgV@qQKPg#ec|(+By$g2cI^+Jmxc zLmpAeTn55Ve|eh4IZ!N{Pzq6WRdZRcdh>OjZRVo(GF`Mv@G?X2tl{F`_EgD?H# z9+ANi!!*vU@gqWf1Pl3jp<&y;6Ayfc@ea#3K%8(L3L5d;ULz!Iq2&;0@Wc2dtP*J6 zcN#ph?~gS9`;PFK1>&t48|#m2~_32)r>B4&K4-QO*V^n+?{nNeS=Vc5e@$tCv9w zgLj}2m2j?rFWcVe4yacSuLB8~m-q$3)_f6yq0S$$W|sog`k<}k1_0bK2wT-C6$zC8 z{6th*D1Ygbr#=YNP$t4IUAlzYb5=-5N?(7VShi+lJB)CHl>@?u*C+&nesF$}SKgRI zPhz{bkh1`a>=rj`o)aO+ApdY^xu&Ld-UF+@KA-a{;_V z`T6Nj@Dex_9L&pr4^kN6K(ps?fEOE`V}#l`|8OC+ctFUV7zyfX&*Xx~FV_im75hm~ zCbD2dL;gw^z}}?6qtYAPwpft~CLq93rU}4Biv#^2Tcu#x+oHJ$l>PDeiZbY4TL7?k zEHnf~_(lN}`W+fWFhLajytjO5NPTM-O{I5KvY+2Eh#CwyVfzfKxRpai4-Ae>3Jkw({>?Isl|= zwGyxco@7=4Je!XVRRnyNKXKR@TC?ObS~y4-JT6)We2*S`FJT2i zwS3FlJuaDb zoV;zhKM9bBg2fa%py?`38yNv|Cw6dPp0vWp&4?1R7qLZ8_HTz@L8~3n+B-<8BDyg>^U=Q*19NhqRK@cGLu+JiQtQ=6!_Ru&|>RLdp-{W&m3mxB1%WALy7_@dZ0VGwi!#C02 zughbPj!i}cn0Ur+GgwP#K*JAEyww65n2!eGFwlz5L;Ptfm?zpV2(~a?E+oF)7UcB+ z6c0Qe1H+NW_MI6zT%t#;3SlG}g$}$D7jk+_{__i`&Ol0b+$k)`rH1dJv7*%Nm%Tmu zyu?~bSDk~>C~A>WQWfrl*j@#ffW$SYfL^ zKh$GKhFFvFQpatgg!_vL-}{$7sX4H>5Gs%QaTOj=SB>5(&{(hq2~z7kg@6#pHWJ&kOcY_=VRiH^yZpW2D<^}YDfpQgqFyVX73JMM~ zas^6^8dWA}2!LS!qwRPh5!6``LWUFkQWS?KZBM%mHg3;8@*vce{S0Jnix;e)?@15s ziRztFDg4tgfCH8gUohW3mteL8VVlTeoKkB^#YfHy1XRPBK+ zbOZ8q#U2Gk9w!-Z#cl`V9mH5D;k|D>#Q{uZAqC8)VdgOt<2LjK+Q|cEYrhu&@YPBA z8fb9j{O7-jgZ}^v>KqkD2EI+R+d3PX!zm7&d)vGQ19_wVhET^`GU6W0qrs&MK9t*$ z|FH&ON)O%!tHX8?hyHwtV%wKq0mEU?{{QW&0{|_b!|5y;Z((R}Lzq0}m1{pj3j$m{1f8Y?f!z2Ge zyRC2x#sdKAuxFS+?$*DM24MV7{I0h8=FjUr)*u6TpH@jOp)|)|KmEZ#0jU4A)ISdN zUk3Ysp#C>b{vW9Sjg$WitS;WrpOQtMOsZPlQRMdgdC!@bCo&FSQ3^It(m zK;1gq8>quxU>CCpn)S7NOr>Y8Q}H(Tu^8VQo!SlcJl`5nUO-9)7o!pqOr_IrVRnUK zSEn;>Im3@qhQua2VEwv1FQl{GRJvY#Z+=%_rqlDVuue9RG?HR+$yqV<=;D4OWkkk$ z3~F-x$QkzaNj3-gQ6l!NgyE@_nMcK~ZRdLKD~!^cl+Xk|_@H#_%*O<4H$_xH!U zyp`Pynq_rzz@0G887K2?qX8WNqo+_v{Jw=*%f-WgO$@Bwt;rQ}#hk}a+#_!TQ+aKya zSF=?OiA^z)oZy)>@wI&eD!IJPS{MA#?E=p~q{OYaF^q?)$r`p71nu1cUlBdw6%!`P zbdFtKslM+#c9l`nOdIRO96VpJfE(Yi=ugoS@29@dZrkOx;$ZOU(P82BrP+2VY_a_=C~dkC+s3^7ggCF$VUn z@gvKt!>p^X(sLL-deTW**e$+Qk-~jF>+GgB%08cY=ZhlGO@SIXW>@S%ve0-8ICa8! zr;6SIRSxazSW3nlb}{}?ub_kH%`5CXoJ^(mCdwi6@aPL zhhwf{;h3qB7y0DQemg^@UBM_FaqL7|WzJOp-KorS7na?!H)$N=K{m+fg5YT1zK9Jb zHY$BDa)Z{n0*5MB(p*B>7qFw`8Ja{1(Z$!uHIvk^E%V+uqH{vRhdIJ}jrAHIk=>O! zmX2+6ca34>h%Znm8f9MyU6<_Y(X`TDs}2b8lbaU?0oE>lb#rUPqO69$rt~!rFt4@`y;Lr(KFQ?6|6}?>aUv=;MpwSCv~i>k+r! z8*XMdc>q-D&@4yhsDu|k&3kf?OCncHVttZ&E!+L5qGkH3(8W9ON5##?iJC+3FApI8Odwq-=<2)c6Q5E`=w zL+!Q(8Ico@=k!KQ-b6e?-~EulC^b5Bm6pWdv=qBPR-FB--y_v>jyZ3;%La`fs-kW@ zP*O+>Z#p^bQTY`v3hcC0lTxP9paD#VKQ2Bi>%~Rp)}0b#N19@Xli}Dz8SPfGlVeA! z^tD6RBDKmHsaaR9WI5RM+sUGX(wYR#M{MV7<%)*A=wzjfrcC#c#PG_CmtK#Xa3FoF zC|QnFz-`bd$Dckc-Z`H!JMK5c?G=Mw3{yTQPiPc3VidYu(-LY_b25`BolobWN#1j` z5KJe(SY@Wsmo$ew`({g}652IQBe_ZK*Vqw+UAyAijqwE{k!P*Mjshh z*-0%aoW%>$b@);hce8lWonS0*lg1?udp~oxlqh(thuaw4=^ zqEe(kbxOjh>xnhBu%3(Qx9N%F1rwDS%*7vD-)vPuW2QzqiuA5@PRO3W@m9@?oDD8s z@kZn-Pg+x6Ma!Lll0&!`N+w<8H6e7b|1tgUZ_HbX#t22EZ25LshWmwTW|cY9Aqj*7 zMGFUm@>HH$lH{ik${~E%1ILa%CoK44&osE~NX)3Niig{xK7Pd;`vS+0Jr_I}k0TIv zG15guQ|m6y}kZNpFYBW{BOs( zp@K99>1!I?%89D`+1&h+5eUT0V^!nIg!#cuM~u;Pn2XUF2OO>{?DUh?eGb$FtT`io zL^R0nn}%BoxqnbWlX1AGR&k@W1Unxn?ENWg8rX>ah*gazE~mX>Y^S=knR^uQk`oxP#8M{{QR;G~8~0<9yV#0~_G;w<2M;NQ11Qnrez z>E>Q`(!E$lQgk?x8{MS(sLHh$G zCH3mw1&ykMVA$4F7JLKAMxuK6YKh9ttUGVq|ghNyJ*Xxnrsoc&e(gv5|eJZiO7G$E7_qfZtr z;Ff7{vy4h8aI?x4p?oNnLYbpJ42{3uzZ^IEOvAe!i3urTddt5}dLt7s?^F;wf5v+Y zB^f|X2S;2>SNvf9>%1YH8V_3oFke+Uz2*ssy8jJ{f9RkF*O0lyV5qRb+1kw@Q_Y9s z68H3_`#uC`tVd2bACCKDTuE+hII9{_Rk)zOYLq zs%%cJ&CLP|4xoTcN9SvtYu8%5tYZApUz!zc4o_AeA2s#4+LoGxomU{xp0jwKl%BpA z&^k>CdjUCfv00)0W%Q2U(}x?BiK9^3w7Xc`T9p;=@&4sjf9@3FO=kp#L;cXYrpZ$~^$COxjr0+>sdVn9caHR!gApNun{iTvG*swzn{La4 z>>$RRXhJudDCFzx`QgoFKc#5>`c*zj%6fyUG)d`-%8-yyx zYy70~1dzl72%zlU`6S7`C;zh6=50=A6i<+)GDsTj$D^}|oKA24^M8o8zr5bS0(^sJ z{I8$lV<1L*5V=+-v(WLUrQxRt*-LS|Yh;N`TY<&#TXv3c0<*R**dk(UEU|V2=i7DT z<8k~hgQa!)OQPAO_y}MJY4k~6(H)6LEncb?IEdJoZcZ+1hzHwG60sU1z*l0ALGLD{ zoIA3)LMZ!Bn1+ft3=nwJNMOf~5MwQWxyKfJY_hNz=40`qlaVpu_~ zk7j4NXGAs%B~b2#?*Y^BiCFWrD~SPr(BZ>}M|7wz5y;{&+J$&Zn)=f<5MWP5y*KM| zINv7n-nexP7)g)`)DsF#k@ZnC-fuZj$^_XH%SU6wOffC@PgY? z`)$i6j&6-fNM|wwF1NwMB@3ELSMkx2*i*olqPe-LKOZt8V3$BHp!YG5LtHs{=f*FP zBW8e?DRD0WK-vOlkxAukL43Iv>>!A*3HC>xA;2AV&;ZGrSz0#3jKxYI*CzVI_sxpa zKj{Rae9-6`5l48vl$@Mgkk`rG>Sl4Vcr%Sx91&Jn_?eo@&xW6u7qB7&2S(Ga8({!h zd-?Ov3bBHr` z9A=G&__2m=V&g?fAZc~pJLll&7=kh)nA;S18L7J?@gk#X+vV>2kFVpYV&8~7qWh~( z;$eD^E25LC!VVw?#hAB`65qLHlU;2-G+c!^(Al7;c#Z3S%z?guTVOdjyBR`;LDOl+ z2sa(S4f&zqF4TAeu|^i>+wUL!Eqgcj_4@%y1r|c<9H7Dvdu&huTjmnf^w^rs?F&=| z*>#)qF83KF0&bySAWk5PpmP;$-{38jH1rGM^})KFi{Qe;lv42ZNp;32`w}mYYle`Opf&gz(`(bNqII4Zd6Fri#2H{ttiI z9}CD8Gx_jE9-mW=JPA%Dq8CiHbxRdsFae$d9HS#sQ$F*36W_`kyLx)q3M)1lCokA+ zb?{%<5&p)EPD1FOmzP)dc_$8cB3+Vd&O+ zup6wIqUIqH2gU)YfQmbT$-(wya8vq^Fd&j|p^$qiDKE45jxrFPB!Ch|P$Vhqo$_P+ z+h>7WU{a;L3h4hDR+01s0Eq^0GqGy+z;6>t4nD=6+KsO*{tGk!lBORhuHx2WD2VVz z8nX$O<0bBHrPClzOd9|iVE*O3pHNi0rc*(p`sD)=iS; z2jF2J!x%28?ouVhs%!EfRt;DWZv$d)dM>U@;Ng#SqA9&M1 zsDu{J;Epvw4Oc2090}xZ09$6aPVJeLy3ISP0PEhS`I9a;|6&G&X6sd7<1@ehg762~ zswOXt6}A?>ZpAHN^U*`!e@|t10A$`OqaeBY**_m{f=EXzsV|b07|bEhv8WBs8C>WZ zNCwKxGiK+ry)(G>%AMmWlI9W5nCwp#E>>$_HCYT3Dqz#f|HATP&^9s%tQu^5j!@zZ z?@a(BV`39ox8j=JtAp>KeS5a-M7jQr&Js%{^j7?ody4FO3EelY6c?NFXUrM*ZohN> za@xIp$j+v`h}rR8V)_DFKrvPe0w2-=d}^#VV@qV;z0XJu{3JS*-hKaw7=kt2i9dN| zfO!B_v^sdr%|y)IwdM}_@kHr%!=i=EoM{h^t6F)}u@T<{)y+HYkv?B0BUQ30tTh%^SZHqK<`MJtmzBUK#wUxy`q)F*TFr9-Edo(h$HTw6?eI2E(9 zGVm=SnckjjF{r+5rFYh!4co;nw1)Y@I`D!u?72<&%uCxNZ!CeSAqzf=IU7>3iCmjV zhkqD(2(d#42Zs|t8v${H&$f1S>*Ih2!9>)i%jW8Rw6*MdA`f@nOS>XA9#sF*)I6WB zY5$WLOpz-m+91ROv#>sclrs$d&YP8eERP=oG`Q zsVsFds~(%KyALZaxWPmrLMi2>hq4v$674?^#m&S5OvP1$4;uUWB#FaHR7jkiSpYQm z9y*Lm+3*;s6XoGv!&tDwRrA*t(kfBCv?(1830n*%U!7+C~ZHz7$SeoR8C3a(9_eyE0Y`y0!wp!t1QWR>$vN?irTqR zWtAJF;d<#_LSgg0jY6E~hXR*61>0XbIik$6*d~Z=+Se8v6PxwvFWwrEXTDT@iTaA1wzh_AH5WWv^4?;Pes-vBe4eI&;I-rF*)9v|?gP7|v`L;Z(FY}E zn@2wYtq5Ve_C#u?G))aRak!@_>?W*4`2p=l&3>Up7I}sAD)S!Uyot%|f~0+U(xL-N zfLImjTQ^ph>t8?8Tojvzb)yV!c0X9b%&bVR#igR)N$rKqIC-a`wW*X$`{oSPXq)=L z=k$8->g0l~#AqK(z7?nD$4KKbB~W}53Zz@>?K!iI{~+6MAOV*~pdV4&^j96$BS$%a z7o}scm}J0Cs86h-zg6A*NDSkBnAk>ceNxp(UWkRaW&n3mPPHPOQQUcMH97fOC5_Ab zn3($vx+o1}y40!l*?^hw?Ct|T;_DBsi`FnRjf{5p0{S5j1{Vw1ir&F!LXD2terOjz z42Ak@7k=7yj#XrNuZa+Cbf3fSUwr4fJV?9iLC2fZGmW9P9+_=wrd1?8nCBl<7&_Oa8yRWcP?CVP&3=a0eKYyf&4z;zo{}v2WF-Wp&~Vy%#WRn zet(b)bT1wT9)HXv_iFnUH`ws*$et81jB7xXjmGa$kVT(u7aLhxMb|CU{PpZkJ2nddf`{JI~9$ZfnJE%X;yXDXxHVeH$rQh>!1mVeYcjC^azD z_5axW@_4HH?_Y_CM-(y-<%y6~GDJ9~D09S-xn#&Z&(mp82zi7eLz(A!o)sz?5{`My zJP*ggG2G8C=k)x()#>}!ecgLs_rKlA-utuHXMNWAUTazU?^d7(ik+Gwx`uAG_)Rpu zq;7F*w!8DC3RC7hZx^jvWZZODCXLPg%)I9i!g+}^@|{5Et)$r-UYZ3w67F5M4Fl@m(aWAL}WnRGCohahk(3;!4 zufguz;0R`vSw`cO!zhLDQ}My=`D-;PvSp{mcivIkw09dfpuJFOwKwitxuDVv9wRF< ztzYSvHe)tgM!iFnSQ$1N90x9hoxXL>!WXo+_MA!x#>_j7Mw7787gb3~>?~<4H#;d} zTv8rjI@5i3D@OyJzb-Wx0I_xYVZyYH4(74sJ2y(v4i1Cw%4cNRiYyoZ?W8qR?*}BD|~z@A<{yfG_3@%82Eg{oI{Ia z;H!^*F`**Tlp6}`wF*-8TznvcHkORcU#xKKJ|VHu>{L%K++)(_<}a+h**jRz6)0-@ zbq8HB#utqFWSrPn*%rV}ipt>#dJ2Zp-i4F#Z1)~}{LZYf&o)=9!9#p*#NjKTl;FLI z>VJ4ykj||#l8jb;t~1v$p0i60qs>5m6Qy_Fa+}M=RIT+d)GzJd&oPLppO9o*(9S>E zmh)iY!eVvtZRg@UyJD5Y6glR*+g$Ez4=BZHlD08b4RECAoRSedRtmXT~vT3W+HXoQ%(7NKTKj z!~tjxUprucq@2%iR_dv_hm@iW0ER+bs)PilWtUlRaw;XYY0hboMdp=*y0&|V8g-%A z?&8}~hb;H8VsxMVOvX~>1Ip9P8yfdWvIV@hM%UZ!B?aV5nt%AG%VHvGG}f?jWLEC< z%f)WwDJ`K~E%m7YFSq3n#f$JNzqid7?P^(S3%%ydM#V^2HA4M?uK-2E{znAtXYawj z5IB~`EmfbXO~?==tnOz{mo9%48I_UrlN@mDEf$Er4?(H23jkIEo-0mN&OJuU3LH4`b4G*-< zyon3+%Qx$|nF-=&OCREbPCrcKEIZWef$p+MimI>RbMfR1XL8w|R#o&>h(?zOd+nZ# z396{t>MgtFdyoE>T$ePPPO z5?GG`Kymdr+fw}AF==%HMp%^agtUBY3UTK4D4W4hn_PoVGL7!(>V-aLDd>)l4Uo^I#-ObkZ5 zd_uu0wF{00N4W!pOUDby14WJ(dergOd+b2gLC%*-jC$Y3^iaQ}e_gK?8zttk{(wd=blLjk@Z+Gqvbw~`dj85C6krg~dIm6<-M?WKH4d*xwI&)(E9}e7HoGNi zyDTi+Pm{8}6<+?gG)ZS7qk5X_aY69K8<0a|rFiqo?@cjfXnaJIJ>wDfGhYa&G|D)d zFA6koeNg(yiprcT^bg7Hwqf&{W80xx5TA?KT65^nh0SpGM+~czS+n0e96qfLi9~?3audC#WI^V`Q*KFUjkL0PCj}o)P&G^dt zvijbFnb$I`z8MnB!MR4(>!G+WubP{MU|uEMDsT?j0=;`Pb628uHYaU@l)11>fr8x!FLa+mV-Vr*rH zi>n|@0qMRRVwg4PK3|eEFA#lWT^INYAT3+pzUAw+nFLvY_3pc7gWHzOl4s5=Z_`l8 zHBhtex#fmvuZimY1{h?eqjnI@Av>E(VO6D4yP&F$l_XPnb-kL8fCxhBtI%I5M_S3QWakQO%K({pwUnNlD7MUN~18^W-BotXdRELmt^n%h@kE7t&19<#d~e(h2mZ&sD3 z{1a{mp;jc_OJZ`)hi-H8WpshGr*yxWiye)+>Yd}5a6Q%vu2*N4E7~{~D{bdX48`OV zU>)3;c_#nDZq9kVJkHFLsl@g1M|AZCUh8jPh&FGuh2L2LOzPEVRdp7%0Cd$?2~4-M zoa$H3FP+)JjH29TZ{#TSd@!q#(>>C1 z%WIpya;4etb)1Q<^fMDK+W5nl#CqPavV%~RIK1(y zm)GtSEi#4p#fMM{DV@fSzur5~!F6dJk_gAvo&_nki2N3vLl?8DMtF4NoU&yb4pmP0 z>zyLM7L_~b8B7R}tSllCs|ck*W!EiTGWm+#p$84;6~ru}N_Tgb?oWtkEo$~~&*+da zpRp`9nbVCY-Llyun1xcaiw02+oO4z| zKpM+=ZqEc`+77AfZM~Nvlj+MS-r1xtpUuscDD1NTvVPCY>(*dspfdA9P0H?dK#@US^BCX8Lqa(#O$slkWIT)Fcr zPQwh!HSi`bA~}GOBX&2>W+uuhusjSXoG$y(0512hlPpaK!cPZ0TA85hVRo!%u$iu- zgR3sTuZZy+#E`8_1M_E_7`SzxtK2u-^lr0@X! z#MWw3aNt(jn5;}UOBs3qvOS_~R}AU&m>gRij5Jg?RaovzE$O|AF@3f?`RQ4~%bWp24R8GF{^ z!0C$fYrEsZGL>E^?xc5|N(-e9n0_ZZ#|c@3KC~2@UrYJ!c8J8vxLVo>o2#45yco)R z{|HN#*Y^7=F2;!~QF_+z4(H7A>Ykli=?C$z8S>{K!OI+=ldb8c3+gDb4F8`Oty7Q@ zTuJye2DYELtyyk2$JX$e{;H(cj@>b9`-T0K@1%kxL{;iGTilI4b?ZNX+)5^^F4HvU z(N3*~L93j)=Z+OTEFUAPb_55C(F2Aue+3G+@0ZvbUJplC7L7DgN^Y9OvuIVi)DRo! zD%GUryL}0?a+XR50TF6h-NiJ$I=XN^-ZU=aMo;V2&yQHL{g5F_5)59IyF2THZu8s^ z<1DKtqMcWB8b%&5S={V;J^cuHxQ|~sYHB;q6>wM_8n7tp8tq?TUD>kFA8y9X!MmnR zJ>l@J_2SWw^CduFOa@eGVzB_DE$Du@S~lMWW{>6z!KS-FL_cDwlg$D#@2@GP3zcH? z;2SRQ+6i~D3&|B&<=^NlGE#6~&wl%{XKSi_B{h2Rz-g@@z*&KSY!2_*X@5soumxX0 zBvr8K=E;Wy0DbvtCyuQy9K@My%K9xMEsj6R(#;zS3z68&3c7d%ge6j`*a2b}_(rZX znPT_K+L%;&*~VASZdL6vX;pnak+Z@yNZ;Z*E|50O(f=xta%$(4FKaOTm zv-q;NVW)ii9S2Hu`)3{#AUNqj+>4Qw)p9vC6mJ^n4uSz-OdBS?#l3>$IjQ&St>3=@ zGTJaHFkN7=rr59}wQKQ0i*LAP=~R3Ju~2?74_jE^-`e@;0gjA;f__IE6(KD{{yZnl zrN*$j?r`mMD%DCsk`lT(8{>KKXI#|mt;&e4R%I}zVCj1xuD6x(BEC`>SOqcA)QsG& zO%YF+ER)gntKbU|frr$N?{T9f5CT)X=XAQZdRQ)$nF< zVa1Dpw+gEyw^zHYA5fT1r+p4!bzdLu+KgjkahZkGh2E(Rr?{D=i-bf*K$e&7z?X3r3u$#uvipWp{7;yO+0~ga!mnhoK4RZH(=B9KYuCdr{`c2j{Jvd2uh+xdJ%*K0V-JODS1}$n^sfk zY%K8UFs|q={}Zg+CeDu;B9YnIoMo*p79mHZNob&WaocQn@v-y5C6a(XyDbUKJCcFT&P4jT(;zBYRkksn ztLa)+#!=?H^d9XfHu7YB%mGQe({I|qB}ANioHReOdTZmb-FSX)@>4u*Gh+7x4?@(s zjp8K%D1d^ji7s?}NIlIOzW&xhPR$| z_>ta5ds4(}docO}z*Gq}@=h$^8%5#(mBb4q&jU75POHvk+hRW?x$=oyFlMlWR#IZV zn7hCCN#s1R8l87*NP-S(2B2qEDMi?!1g-F!^DV&Vx&;;+;b7E`2Xd{P02|$}0c@BY zSBFr;QV1v!b!?FhU4}whfhYkt-<(}v{ ztA^4RXKB@rEP!ugg!(AxTL<9*E4|=`vX#pl?x1q5;muY5<9WkA!MC#Y6YJyj08R7D zt4EbC9tHrl2H%MoJ)2XJmgP2EtLvi%Ad%Jo+<*nj8+vSFM#IGBlQr}Nd>SH-sN5jx z(E%1&zj4?4FOJln9mm3PW^`Y=_Gvh&9&Hv)B>=R|(ChC9a1K}oA$)5GowFcZc^Jyw z-J+voakyFT+Q?@%S9oj=l3XIo>3L8vmD_DVk({o+XsuruI#NFE!^en7&&R;N`^sdG zAZ#p1IaVH5!xJci#gZX>NU~KdxV;VO(s_(NWPo5odbL52B%H4D>ETtASC_KN z<{5h{M`-y*g9BBzK9)?>3xe1b+va1hCoS76lHc5X!)f?Q&=!F9J;kA;o-v`i-%a{y z40{D05kqAXTMcraP{NqylE&JlNCNf!NnLSR$~8f;d*+o`e7K}dAT>IWs#Z87c@#ne zUhrwP#o@$$PR5%!fcUa{%q0}IJ&Y#g&tBz}eTjR_&lhUnye=@oBk&*|~U>`7i7M5p0(IU*8j3K?IaH4~JF(&v6eZCG!1mL>1Xtq+Vpw77Z{$NV`@i zynA2>5hGf>0Q+uDHT|8fCl~BA3tSzrzu_MJuM2E`Veh4k{9w17?UeA;*t|FlUd3>` z16NU6{Se5Wqd*_wi1Qy8ut+Gi|KSSbfxW*07?`7{%~Nb?@nB$J`WHd(0yA9Wq1Ywx zmH=V&mEXRG0Kou~B@u_!qi_zWJZQmZ_ydiNCGC&z{}E8zBY>tXV4{RXxt#y_hJU*+ zqoBXdtshgC0zkJG)AAB7+*^Ns&8H_o6FkkHevW;N{$=oS8)<9d zbs$diykwYN%18-|y@`!lu7l!2}!tj*aIamE+%+Zlrm@6e}D)m4Cy< zjrWUVQJ23sY5GRr%w!Lu+l(rXOCytW^U>X(r*L4tSg5aQgb@$T@4+$b$}*6?X`6@N zqf2j-BS;I(OU8EThLJ;TUi~*Wo;uT5Q|NXulnhmFf?c2q)T^~HQe{6D=l3)JP(2~t zcd8M`C9%h#qLg4ZMjp4$>T1~Cb1!`^4ocy+=oc5C9zu3t@-(gtW2`R|+$81Y#TjAQa*Y5&55CAqqZk)tf zda<_&$?1ePz4LayYI-r5Q8>hto#DVK;S-O{z?h@{pr;A1Hwr4p)GEz zE-Z>@yg2|wCqx|*v1LBLygd)Jnh`A9iN>}BngmiVy*bj35&ZLz28mj;z zpq4A#Nmp;+i1_mbH1@q?JP{Wr09MJ$_|8kD|B30)8*w zTDuAJSgU2r^b)ATaHv?|@qzN#PQrW?kItE7hrZw+GdLX1VD(+f&$HV~VS(26fv}!!Fml*0ygy6S zcs>B9l7He)2wF}FimHi$m{$#pu-Vh^Q9RF>n4i87<_AXKFn$bM6cHn9HP1)_yZU%Z zOMpg2Y1G94T!$$Ad{9d8h~NjEep&$VCB(=+f#&nxCErRp^W)Oa;uwpD_~_}ocmyRZ z)`Hkb*gI4stFqL>h5{QsYF#39teRdwLGX*W?HwFI=sdRS(k1BzszMBpfW&t`j};*P zF5#M)%WmMYDD9tC&yr*_OLd*~Z-RY4VTeQ2*%-CQ3dO^}H<3hD_16G^+@m8k>0d?W zbH~`-J6Kmt@Zp$4$j-|U@POV+CzW)BVc~M_*O~GFr}Ha_ZwCe z{tuZHaRaJ&y>8Nx`d-iW`YPq2`ZvVnZ$OC_Lw`F?X(=Cv*o;<6D#xUOC9bF_9JHwa z?L$GR&!m(<{dBnh&dyHoL4cU20?=3%fnx`-h<_8oy&9tJ!9lB+@_+sHSANf#c6|1C zEGHt7T;IXLA%NitmAZmm8urxc*MoRH9Jtg$F3pRJi%%tc_y0=FVI}_g2e2>1d*UuA z=Wwm5&)6G~ef)My_3pu^bd?~+8F(xo-ykwk>DVbyUIq=}4b2>`sR?{Z54qaksCvGj zy00WytMcNel9+}3%c=Dm0CR00S3u;~vSQ)!Vv8D@`2g8K!DxI3G+?OMkxa z@*>apC+?>PtIY|u@C@;aR6NXwQ(ytst)B?*A=5GnO~=VPq<=3BVEXFBnquB+@x1dsYPr1X zpxFVanvgb{$pD5|P5N;+XE)M#K{ZzeV;x$gaZc61{CvbS70K-f4>&C7#49?@rmXDs zQogk7@p+7W?rr%`!2Unp~>i6Ye4=bXW4I3+gWu5`mxoO=1i(yPtM^JPH4b{o)~rpL1EY-Yg$ zDkHc1nqwO%Ft_WF8cl{eFjx5P>0eLxV%L> z22adKo6?xp@hCsF6Jl3pm>$ zsFv056jPizlNGZ}Usl*{bSH}k*hjIMwVFDG<{ur#6vgfHC-E_BsmzG>AGVMU$N6kPlzsi-?8Fhvg$T!Kfva{5S*|gT=2nc zxD05JRksZL+3z)jTF%O{csQTERRkhWg7Ov6e5&~VBmFCzR34~lZf-izb&7Q^j8To+ zmC=eWQPc6ty(_IL(QNbZUU=RCKh9L%jK`*zVIc?nrfCRy+^s9`et6#LT$)0~%D`2u zK|i{<_#7iE={5Q&By}6+^(JP*6f^y>l#?vBIT0>C$2FZ6ZC21z$(gI<@!bS_+17GH z2cubPzg4XXvOP)BqMizI16)bWo{{Xy>h4|fhyu~MQnQhsqiPR{>q4#Q#!L!A^gWlz_H>nrzP|tJ=$!J+VUsvtM_QiV6UJm^`^MnuPol-wl8X%R~=Wm01wT26u#V876TRzu^=mS3F$VMHP5igI0+ zEj=lX{$z@%HyC~@Z%+iT>yMHqe!#iOF5D3o7|+<9@#ZpnihYiP>(^Tqf&0_Wy}H?Q z=88CmClldH{CPMP-7!a6KL{60pbiYoK2AOwF&| zXwi|M-l%aBcPhNzjQZ=$^R@6DFVUKK*YtfJfIj*{#M5B>pRqpQqYi1LnuSS5^nw)1 z-79?}(**UGkir`J{iLAiJ>~THODC#QrDpOrq(-`bYZ?HnAp!<+V)w5nH#{yy^&LJD z8nG$I=9$DX-X_@Ztx_S;XLY|I9TaB)K=LJoD`?v$c2`~YmPPfc0*jmP%~zkp%j7h3 zJeDKp2Wj|GKRUGX73DmbdD3 zZ4(pE+F;zN8RUp>-J};mqR#NdFIWa^Tm1XKzV*ms96pc7HTd^!|C^f;7Tx$Njl}}| zhJgRqHYWT(mgxUa5*oPEAl((81sYhbF}*sBPqKdJcY)&|PLG-SDJdx(YVvaF6uxlM-gEVjgHZR( z0{};$o1C6zGynnUrA9)m#Q4n?fr1ApdHVcC-xY6I8QD$XvInRUr0E#<731?YAbIew zvVKi@5GbqTl*gA9{U(57q22%y=9fWRC)g-4^QA8QDUk1Uodb03I4y9G8KiEZSx^E| z01NkTiS+Zg#g0NbXU~HK>_8+pGJby!zMD)zooK3>q8LDHN`r5)NNj`s;T7Jnx8NO& zit`AtTcCrsdS25L*FWAf1BI@ewNn5{g8>xbM;z1W|GlsN{b`6p&~zK`>p~zS!wFK$ zxtIU+76ODOSej|*7ciip5Tt5MBU8?US$X(xYlY%`J|S-GAP2w+2yQO$22UT2?>yHk z3I!Ae;5;W_`aVRUm)Q4AMRO~)13r)U`+nv71EQ6EiU-xQ%6Tx5arnBjk-G*;aqUch zk6+*0Nl{s_NpXq2pcm2~dy?#NN&%Gn^Mq9IvHyW0u-IYnc@$h|7*OpB z;Dy~rSH%B#j|1RvP2R zCBF$l=f8V0-mqSXrJ8R32y6cC$o~l3B8)&ITGK*?8#pKYxA(9BuIPNp8w$!Jq`)f$ zh|;wF$VorS5USsSd(?3O*WbMb;3NPqWZ3qA8U`V7hlN1s0c@b?w`Z3T0!UfgQi2g4 z-FxCTJT$b)2*hxTx@Fb=_-X8|cfqR^gpPd#P@_L5J@+^sGQ8%_g+PVWP2f`S%e_A{ zD>Q3lG@Jkoh6i16_N4XCG7I-htU92%=2$)lFlC8$B;A|{SzOe{Db}wZy-HMPNW~mb zcA1y-xSrri!rXlCC;FnK?>o=?n;*oq!6q%&P9VarQpMkDpB5qei)_Ci=^bPGEBYp| zhmN&f$9c}TJ{ZdOm@wiLOp!g2^0!*3X0%%%iK@dcx)NQo>m$KcWtRnF`{oIc{=j|& zh*ZF*YEgX;flQ8o_HORK5PsW$Q#)F8A>l^`buA(7%>EOj#~y?fhJFYiD7y_N_9M=Q z5CD0%?EiEhK;<0)T^?{o%Tbolr#jL3(y$e1MT5~=XAy~1^l~uM^nK|y!Uw{tF>Y(7kny`*gjU(#5AgIL~vWC}ESDO~Y-~Vxn2n?D) zc|vvS8Z@u(>50Du!@xA3Dfox@tgNhPlXh%u_V=`S4s{fN`oag`ejSOxwjU=>JD|xK z;%iT)o+AVvc01cF-o{Ku>cpR^0ks|BIbjA$%Q@9z&v=LWG0Zd8s_fUOQ>?Px9*Vcj z`|u}qP#q7>mcr)IlYoh3NuM*O%G!nZAW;uOySNSh$Wq@2x@@b0fi4`R&s}x&x%Z{t z@p$DqhWfL=2F?p;!G1oyS|jrqGhh1HW{r=B!YOP<;}-#x?;=b78gP3ryR(8S>o8*3 z3*Dx%JD4+lz;Bjqy!UcC{{(h+7{sb zlgvye0VmG2>x3ZCQkDpntYge(1dUVh(SakBe)C(zjE+4?)$cSYte5~UE?>Mor zuzVgEIQ0AWSU74>5o+iX0*Rp;dj=MIRNMJ}hxNWwNJ^otncKiJ#4mF6F9W`%9$5i1 z^HA){9|a#89MmuI%w~1e3HZ~kv5@K2VIu^Fu7sR~ZhcFftt~0y%Vcc&C;hBU0&aE4 zzApPWOn-zhF6zG8DQ`iC>2}~Xe2grQi{t3&?d8&a_Fs&V^MN|qJh8|JMdi}e^m8=n zfX_48m=$`^e_VbEB-hQVGQwc0uq~@h9I#-y2>bi{KX-J5!YxF*P4fxDW=Ll4J_yBi$HMW; ze_W(O9*v>(m&5F5HV7Xj=_gQUtDNN3xgnx$w>|WAA(HG5EaVO2gACh|%w%5zgdZ7A zv}0%{oxDXcv;f{xmIL_WzTcMO01YQX_9lUGg7qBF?yCc!iK*STeu~QGpFG}g;|xqI z#fw0J!ST)g8XP(oMCiG}(ye8W0)+UV7G4eJRi9b_U1$`z&xEOF16dK#_WsSEgd+js zGtkn~!fn9OJwSVMj0QSuljB-AFvRaz=41%i>X+;k!1LTb_}rEZNcHWxG+Zm^-z^sg z6ZTK!iTT;VLE0O(AU#P4Kj+tNqiytsAdk8snq$XF>Oc#7M{zMnB zJg}jI(J3zo5a-CA+EctHP-l2efe)VWO%5~O1Ul@JaAz)jW>F4zCGMz9Ou{7O?V12v;?Oxc+Xa9iY^UcNtSdU?=(&#d<0 z0RVD7Ohl$sW4o!wF`Ej2)&FrU{$l}%kC3@J&;{B$xBJ4JoSeK8Ko5^yFFc*Pu-~VD z9_WV85C02;=-VO?x50;%gMsgVTGBV2M4tqLCnCca@#Xb!c0ITN43AyczkmJ}E7gO6 z&4P*|L4P`hd%6*XImGbO5T`^DHb7zoP&XU=mmdj4?4kdP8X%bW*hyQ<4|Ih9E-$82qH-rGmM9osWLc#Ow!(B#u!>~V#AtfM&me%sUu8xtS*(a~E+#pZ%dA6{(ygxsl*#r2Y@c`v0Uobap$ACifDklW8WjhE}T;=rOG9 z9aI!&>*vph=jP^6>V>ZOv|0Njm=1CF!_4WCD3!kv5wyvu5Pcq(fLl$Lezlv#A8j1w zu5c4aaPercwIq2_CVC-B4LTY;Q}Bq`SP7!(5b^3!yP#?G@%fF%FGUSyPig0y zw0jy%wQ7wo)t@V>G@w2?#=_A1#9$_Bpvtu1ljzJ%v(&zev?v{m0^cm-4mBmzbVvkqx-auBYFfd(e0FwD!7WOOR0WN{f<7^k>$2kIC1cOK)yrYODO8V-}9o zYS0c|rpB|G=Y3&K2eRwkdOBdm9FH^()SjTFa6kXS{|=}nI3El<@z#LjELj*umVhSx zz49#MM^f^me2sPqq_tCLsbtHxe$>Jy z($*yr2xWEAOJ=8%j1s8SMQPJIv&;&RKX2`iUrZHZ6A3jEUb%)|hO=a3YzHb9rWPQt z??nM&83*mrtE=HRrw`sze4ARpauZL`36??LeuW_I`H)(}>eM!zj7+CvlQ#hi`Z`K{ zK(YSFyVSb=8`QrSnJ{s3c~=6n{FwtJiseVjQzww}bKe&W<2x9ynj>5CruCjiLB1Id zvi*y_NLE8!r_c{LTKUn@QFJL)eURBIc{_Lqz#J(4+bI zxhxj{+MeZ2rM^8M^%2h@;)KUe?>jY|_MzW84~vzPK0*(mI)9&9*c9;r6HcaM9z@2rPweo>(ihE+pykKJMuiG9d#JJ{P_adUH1 zKxT^k$|%7yLg>+CX9BbMFIk(et~?bIqgdLLKED*3ktg6v$H#b1MUk$t_MJ-7>%gSF2DQ51-UYKk+<_k!C2s7+6UA;#C!_`5?w71KZjDEL zz1<8ixm6=NdP}kIa^P2<<++@s-?3e&iN|QH79r+23hgR`iVB%-0>KX!P#z?#%RCN3 zSRM;0)7Yo};tT4zl!GC6v7v||8jDzN_xD!Ko;FNs$K${%-$VEjW&4zQW@jZOwauKB zzzQx{D3M>({SG7&p3tp;9pHjH^Z&Yt6(ZsR6t-W}V&j8p+ zOR4W5AHzDAkCRd>?k3;9Jj9 zxj)=zNTFX)P;eD|e)O%vJb`nniRVk>P*_v znW=j`%KljPIUfhDgJBhg66AO!h!bu6F` zA^2G?;FNM97gj%)YMv0=kV@;MVfK{ z>b|{wggRT5)8qcY5M*F@m=X|pK6LsD@NE=@aFwqJS1Oa4*{Alj!>~r9JkXVA8Wf4+ zi#XGV!$5Q!2>Ro_-4`<7T*RRhm16*9n%Lpu`nF&LIFW<1xQG0F(E(~cQ!7gtuxp_J zWnCTFT4uL@{!`p9Z^2Sr1aaH=5n8H{?0&3p!2QRMi*~5pTa@|pbJ*9QpnL>nS-dnJ zIHwkr{EY^5fcVi8xE5KBocflq)CKR zJwIJk(}O@*y>|jG`BMF!&>|}8bD7DF%n)AEinBkz2KP$Jhz$O8M%^^5)5ktap)j>~>jqCTU2|AYi z$1y=nL-EtWzNfec{d^GuRZN!b7-0?R^Pr`Px|Bsy9UekFht3J%Kc{g|O90FmIv$S) zt_B;0_O)486FZ1azYP)t9^^E?bkx4{r7~1@&+{M>TWiw#9)zFigm&Yg1%IED-fT{jP)|+*iOAO&_D<)Ne~+#SHBoeH@4#)7RKz>HP~b zC%b_)mkwSghsFSbjkh}_dQwxkH@)OWI;8JHAjb7wPsF1$UeF)_Xrj#&_-UMmH=rfC z5JhX2o)_w|s_MIM-16eETd7aBi!K4M8e+$Zz~2H07pQZNNK?D?^OybjfYOY2Iry+u_rKWt*t_84|4Fe2_@1N0WLfMNtwI0Ou71a~Iu)*r#QmUOk>&JU zQ`4@$%gV_1f+sq@!vG|r7`4EG&p{)nvB&JknsLAQ0BPjHOG=e&hNw)6_EVsg5GnFu z=(V8Znk#_oWTo`gW#u)nF4g+;kNZR+2Ux|-bFY^qon-~QxCz+|q`!D3_Tk^Lb7GdD z*M$7g66nC*iY99HE=BxnK&C^M@er)(?4(jW_&kLb5AE(4$;i#-{`bQ}V6x&oTxL)QayFA~V_K zkyNI;9M*H71J6>pZbXC4tYujpLTvsCbe?Dd$S{>O?GjFcVyW?iP^KX9ljb8V141HC zPqzXk60f&LnJ=(5C|OxJ78WPj^|!cU+2-_mCx!6W497vx0Ersimv^1$R0y;bAiT8t z+YLt0iPBX4H*<+pAc>;MJ?87$>F3_q>#$Kp-p z04WOBqy&jLuj)4j2CnzB?L3RbG+TKJ3>4Kf8f1>;f{A)1D-#pZT4f0*EJZQ18kQwv z^%vDsDEnMn#B&y28jqLiIj(-rK^aS^PLERbFNDJN0x0Kl8=nczXMf;4n*}CpuH+j7 zi|!pce-py8HUnf)-fC`4 zna2qq5w`3gz3N=u7NXEg9eGA|i8C3(M(K^9llcpeu~3(m?XT&1cRrFxoEguN^%zPV z7!Voiv$N~Z5?^|^K3h5yw&^T|)>;m)dRITVUJuq=iR!eO{`4|u5?vK!0a_6*U=Rz8 zjP)bnd?te>x$bIEkmr$f!16ZLVebKqXBC} z4qzDnY~RDuc)V{rT9kHK)+^jC2p|P0CCW=b!T}tZ)rn{vZ*%<(cBFh ztAlvHK8RjczW}nTUVi*1%MQ0D&?pY#))k7I5(rZWV#1P)VSFa^Ftm;8#?yT~GcCaRX^_K6nO@cu!z^!4-G_b++kUyXQD% z{j)j%0&kf(8w@Ve1fQJs{=2i8a&7nkfF6$!R)Ah^7F;o}m`p-L;ejNTD^VJ^H^v$S z*T1w#vN6#-bX0l5^~i;%BeP^IOk_B>(e~}2$5cX2{d5}Kw6v75XwkJEF_I-Vm#?wQ z!S+F`@d0RLO6j@ezmBf+A0311`gS%62W44RIOt@+bHK`d0ApIrKH$M~%2T<(2@^LH zIGlU|5t3DWJhOZ%QCr8&s?5TEz5-o5qrG5U&M(!e?TD{rjXhp zGli3>X(N2RSr>Z@6qY<%=877E>2yWzW>D(cDU@#nSecXU_XoOh6LPVIg*kq|&DCFl zzEjAo>OLr+T0IU1#rT6kO7OQ^qd`GUoqAqR?KFKZe_d<115GHlS&*?B~wf=CetE2=wZ;nVz3?LY2U6Q z3R-cmH!FGmGlN8a5f~B3D&I8K-fh&Y-vTW+F>G$>CWGjV9O>D@4)vVT2?@FdPfV{& z#_R02Gg(!vRhYMIBT6k7sUh9Yn~{Avy>dErsj#R#LGo+CxY)qi@w5Ko&oar}S0#4^ zbUo$@t_dbBkreTIE{BMWi4A_h%)WYX$gi_Wh-qc3rE2lAmoi_Is`J&cQMs^pmUiRs zLX;$$RVd0Kw=2)O5t)abm3h@e4kDek;~}UwYX>U|45o`7-cPgI}IY z_;i8^)CAa)#%Ifqd+)cEN*Gcj(^rU}1DehTW91nl{Eh>uP0dTE?S#$(N9v1`giCu$ z<%? zPJ5tN?OMhHUJfK_wTKH3tYkWco`;jwC*-uby;N-3tkyKERT)zU`?lI zGIsFq&r${!y9}5qR9I*~B7){*`DyD4N-q&hk?mC*pak?pm0Uc%_U&S1Z+Lb<@WwnN7g zw-7~k45YL_s2KzK1PAiTcDo6Wd{Ty{&rC-~2E!CcS0z@J^C?6r?rk2`de=?DDl*?= zq~mx?TgSTgCYgPLJ`#o*109*cssBq%hD@1dD>Um}LD7KdSJ#T5tx?G}JrB5Z)NQih zgfq8Hr#^nMM@KA_tmwAf zN_u`R*)|+ak$)G}4;tII%ogeCq7nM)!}L?KZ>37h?AWl%bi%VTrAYXY2)eJYC1u=x zY^d()G&i$Y)M`g*w~-55)#~6bi;SMT4J29v3ujL@bi9aDd;=hWh5*OO!<$s|JLUt* zZ5jrw=0if)eTjIELHGm-8%-raWJ-4# z`<#F_3*-$HFRbz^ZC-c}S`xGM+sF(!V=SvYUKLM7yTn*HeL4%d0NXLNWy|w{U|M2U zOO%#91J>?marE^E2j_(i7(TN8fy>TAg?$oEr$b`}rnHTq0B3kKi*Q(i=HcaMRfbIF zKfPQ^3|0S#*%}0W3g?0@a4zJXAE@$}(b~QgH>@cdYz7=E`eg^v+oxROUBXUl9SvGV z)#=4}Np!kndJSq?yjZ*Mr;({rnGe^|HP{j(Sve={?;~N2(YCU8J8yBJSJ0lDUqsfl z1{s&suAyQu0aow0;kqyB4)hwu1&p-C1%4Ed%oOeOnD4u`pvP^&?j5Q~fa#RnwcDCS zRt54&Zcn|a+!$Y?d~o?r!qWthU+sW=x4^txct~$56MJ!Ax?$sSR!@;{vGp*;H`lq^ zKxsgGbTtRC@Or_lt<3B@NOY*C!=4zRJz&-5#Gxap;XK`DeJM&NvxIYjCcTML{5;*W z&5wzZ(qNd(*N&ytm6nDB#PI2aLsOb%_7(jmb6G9obb1$tA3BeuB-!eaC^~3b_gdJr z*U?Q-+?egDLmZme9U`&O6%xDqaTH7gIJCYT{+RDX)KNA1L`Lq5VTAX%U#O?|yEk9R zag>uAfIs5Wj&fS8$pJz&W=3EEjNP!SL|5FNkvoCux+Sw|e zt!H}t6jkDY4B+VLxzi7H03=_(>3B%rafnx~VFm}iLhq0*Qse_0N^+ddYs;i}0?gdy zyaWWokyZsfdDAl~kiqiN%9b#MYY_;)$J#g0)uHfuE%+@%`K#MYSzZBrl#=f6r{II2 z7ygS)U!|}0u5&2l=>9KBbS!0EAJkJuA=}{S+?-3xC&}T5U9+k&A3X|dDluz%k3p$G zVc)stF0bA49mtw+x6P_pmWa%Ox?$HCbMTc;fT5u8$@xDybUQTijjnE^gKF%O_>sGl zG6t87jnnf-{G@#3l3Xo3yKcK&%0wt~SKA}OFo@~2OwH922hf$crKqdGb3bLGYDr>@ zyDcQ)uZG)$aKP14M;mK74^4w;f2mvoo_p)myJSaBiN?FMyheMf8i?sKr zuH@WR?ou(Cd+xFJq3nRIbqt-ZYs#(;i+inRZsmNtYQt*gw7fSADy2&uH9v|+DfK|y z8&#ov5ELXqVYY?R;5|;N zAmP?}>1sozYiD|weFe(T6I}qLf4)rBxcMnf7lX+IWcGGFQ^O+`7MD=~O!ok7 zi|>RedFFm~XvQRY`Po(OZfoyWfZ0Y+kY#k`fnu*L5~Jg_Ih9j?2047WSm`)$5*!9vzFou<;ReuhwM+5D`s0)-rw_8{}(#nr8q z(M`RbT$8CO*Bbkt?xDg#Ff*;H&%GzY77@V zc_5ZBLlaaw`KElLts{7f=1c$qq6aD~st~_{?J4xD%(Wr#6M1IL>%3j34H6FfnJ z(J8%9Gd}h+OpePmfl4h$sI7+t&2OaVEZT7m&OCrx*kvs{nl7m`Tm)qA%E!$OFfED% zsmY7(ZUb}DR57z5$cY><)~VIF1rk?`Ac4yN#B`~ zD3v^q#R42uDxS@S%&_InAV=D}WRK^Bj1_g2&rn!*B1oirk7_;LjGhK5Q53e|%wM}k zb9?#|p1>I_c8idfLO+2wy-iIWpppx?pVg|0F*|AN+HhyveLgJnu7{8B?NJX9OOU~o zySTiLM>J#IzU5gZF!aahm5b5mvtI-qgrjYwbx)pjkVhV!KkH=Kw^6;nWP&WKm5I4p zwqSSbKKb(aH6$`)1(h}7A=F0e`9Lg@cco2~T*|0O&-&8VnlIO#UAyKF4!d_asl%CV z-aSK|NTa#QYYrLhkJl5-P(WC@4+uW1avwtM5VXO>md*71r5tGu=)&2#a@Rf4pGgD2%O#Sm7noOHLGhii{HuvN@tE`=%x2NVn} z1@0`gU00scELqrDO6<^&^NT8O2wr^u?~WJ{OF}u8!Mx|!UHda zk+Tc}cUta!v5leid9Y(pbN?8!d(LJ$Wyv2i-^j%E9gySqzZ$#pK&bXFE?X!sJ=<%P zs3==n3`L@agc#Y$QnF;Nrzk`tv{{mr+lnUv`Z1-f2x zyvP#|`xK0CS9z7-3NeagAC4{gBVO#!%>B?)a_SvKw@bON87OWG=!tZ_RWrzbEAv4k0`d44xYZK$kr*SDWBPG)A=}}t?Glolt$-XVW z8X?Td07nW8;23l5f!F=`(CG~904)guL8)EuA1Oo}*WIdig*4p_(Swaw;a7!Hq)t6f zCl;obNq($vi$F85TXA8}U@_ZG(G^36+gA)7MDAxOXD>2H+ji#NJ$`}~vnje)#k^rm z;CrlP_$^X4X|9ATOijb3PFaLg_M7$hBfXp4&JRLAxOFmBpXKH*ye}Ln*SJvoPMJ6h z)vryUPe{#vSMl6!C$CvW#;_MKMqq-~VyB8iZN60gNH-x#|J>j0%kStQb;;T`uiYhe zDj~5noit`LOK6R;nW@e6cw@kFViW0Q?75h`4J4V)YZ^u@W&6L1nB3y9SNYc08xgQ6 zK(0`Z51k|M!^X0*Uk)p5^eS7yb^7k#S*_u+>?ClFP*y5y3Q+3bM8gC6{`jpOpX5TQ8(^PDpZ4J5CX zoGW=h@U0rYmhQL%bdy?Uy}oU@;S3HgWnE$nU!9Uz*x6vU43rfZ zo-}(D*LQ~C(Hq?iPNr(++%_`T8;K@;Ol`{1g|-GyE>C_ISQo7JHWar0E3>3-b&uO& z7L~%N9%Y*o1P1$>n%&PM2{_qt-$x%EBBZLLNRtt%4P7^S530m^#Ap^Vwmj{Ph;TL_ zu*OF44qTlm2~=xpNsHPl-lP4OrwAkFa+*Q)U~_6si_N&Rm{Xd5{LSlhwlN2zz?*CU zYd!&=-Au)#6f^-dX=GfUf!_MCY1j32rM~G^%CkPB`Cz8~fUrsj%H}A7HO!L8iDAb(^$iZbM5WY+ z6r1NeWNHuQHRvEQO4qk$NKj|cqiX+F;|C9Tm18B1&VPM646!0H=`kX>NfRT(W!#x^ z1JH6f9;UZrdh@;V!mrScF>Fv9cL{L5Cc67RKpyRHlAJ48q@!Swx))B@s+jpT=n_ej zNCeA6Ni>YjsfSR=bz?B2E-v#x*4g83r$4+i^4@OW1`&@7O3veP-5gFiVB<`yGuA;Q z#|WImmNuSXD(D0)Meuy~;1vZh4`}5lH(5i{F%&eR0>ynj@zWoKx+-ekI%d+Lbln--G@*PW8G zJ$DN%Z^%3q3iBHK;Y#%Lm}^M>1YXeQpIY%8YY{K8$vx=Ar%DI6?wdAlvtWhZaBi*f z_*3VpexC4!FAoYikbpP|?Y}is;U($n#@2>l=Pi0H*V>@9*zgeas`P)+bMI^J?9bCT zb_lF}pq8$DD1qa$N~~dwCZ5qC;VL-(V-XjF@z{dNCypaurYh2_OCzA^6JnUZb9EoS zM0m4-V9XOS(Yd;Nb-HC)IYY=fqGp^3LqWLPmq&BMx`yV#bBUaFp%1{9%CCGBB-PQ~ zL2^e_rvM=mqN4vskSwzXDNcN(Aj3&aED=@;7^FhR?w^B|_@f%yXTD6lb?uND`mS=C zW}sg`eFQJ&HkL`4>Q#OExIxzlee!bru9UqS8^t$Y=KIp&=6HYvF;kME>$iQl%u8>X z_1=WfcGszw)q;kSaJEh?=sPj{DuTleWup>%I%a@g^Pv^c2L0E)=#`y4{VX-(?A3~w zvxW!0Pjsg?%5*>|gwT2SW7|5pjeU77#`4LnW^g%_&$86_10x@C-wxHb*sBQ>mt};ySZ~l zeRw|ACni_1?y@ZPZ8B@g7+&`k;7JLP4cF$h2wEn+O-?}8--VmA+WDMhl1M{XKir#S%9naF#&Njqx!y{M?lhn=8b!Pwn@S1dJyq2k0+8{8MV_ zOr$ilU^bM7MyYp_wSTU6rHwS0bM_|Fj;q~$z2O6!iH-w!9>SB$XPQeT z*^tZ;+~7%4AhjE4!VM(H6}0J5YMoiIFkh(4$!6n&ezu5M*k&7*B-MGAZ~b1ZBK}6& zgydqMGV9#L@4tD}@ruQ67PtWra}&H0c2m0n(hM!c`IsF&N~!bJn>%LtX8!Mzz7}nA z_BC_xf8lFtCaCR<{4RxQH%6)ltTU+_QDoxaM&yE_S)wR;P*x6%ZyKw zHy(k2WLU>RkWWEh`DsB7?647(>AMS3BG~8bmp?b?b|1+fyq7&RjnCPl1JnO`y46nx z(g1XrY`1)}UdSuBWmGKdW;%F{A%^z38!N%@;kE?-YN+=HK2KOj|H@~J*mQT=O9<3~ z`(w1yirY8B(*)Sd`Z^3Gw?*&b`BZh+|B*q?zNexe75!1RiF;h`r(S!)M?=FXv#HTm zOV=kZ)-#_9r>8vJ%OILF4jF|L&(G%4_d`T6(S2Nv8V0Nz<_{|3aV&sHZWOhGK?@1v z_p+0(&;;L)RdRB~o~R3J2Lh!j6OiZRSRiz7T20K*JTUT_efPJwTOB(bGJdwy>~(bt zOFEfLA8HsGlZjr(7xeC^yQ56%->8UbiW; z&51A0Xlw1p`U5NjwhE1lGf3v-z;M0hm5vw@mg-e-{3>33kVmF66ur#~-Fm&_AS5lVOIVL> z(p?Q?-D4m|r=N$ZRc!c`tN&};UBIX8oBDLeG2)c0jSgae`i81@I(U#=FN*82b>wyV ztk*3pFBd7B1veK-_XjcS@5C;twy?~;GH?GBH9-`k^h*rXy*Fpe>6senE`Mwf*&e9? z|Gv6z)(SCQ9}QNG09qtQurDW3giCJnY|TjTdqC!>KGq3P(cFuD&IRDWMP`QPW4!}Z z49RYoi0GkqgAMSV{==Q#4~%cu;R~gayk~(+cC%H~L}TEw(!Xh4E~N(C-nI)s85PsN zt9r^OA5>wTS7v-{gc2H96c>V2V6%kM>=&vXJa`a8;!{epI~su%pzwMC?N|$o^m7|o zuP>wWtps82ShU#L2rS-13*u#oEN`Dvxh8G{c7#49MC{Jxi7o_g+pciu?>hqz56ZR} z8roQ-vhb6K8&?3+<+EQU`Tu2p6C;D{2h^#ySiTq#OW`o62JJffKWz&rRmMk`eO`}hoZ6}F!|1B3?Z`tFbn2>ucF5JbR(>t(AcTCoS@bA_MEV}#D5?+e4jEq-Je zFUlE;YJgP7ej2WDus)Zd25C3YA!bec2vC-PIiOMtfp8}E< zZUNtmbL9O@fi3wefFCJpFhvp9(%~)yJ0(1qz~=X^FS7O`l%UA>0xB$7wN5uN)=)vu zUz6X&41n*31?F7idhe5zwInDE?@Z%RHDy%452F6@lG$PSl0BvWn>M8)Bmh{T42+Zn zEtO?*70F!_+~Y&YZ>x@i##Yp2YaDeFB4cBhSvQeu=c#5b;Jub(p8c9k{R&8hqDqUc zLGS8SCi?+2^pCkn77{XFcUfR>Go~Og0{oVG%8Ae;h}q!q6gG)eqRz@W4=W378bivT zXH-;F=EPfJ-c7cpIW4YtW@H|ouM=AtJq3|Mh^;QrV`BLva1j3rz+%hI z*ZIe}&7>u9EqDRJNLhJ);`2E~&Y3nS5tTg5ZSPGv_4~mID3Q`o|IuN1@f3Ch;r$+k z5Z)C!7SrHrg3|BTP(H~NLU>w$PlpyxUr4n{v9Xf6cww?W#HJyZV{n}*<@>)MT-t&e z29l8HE+d6FN|-<*3ly9gF}Z>A$?p>2Kyd69l+W)gEOIl;{TA4{A?CK%DQA~C0H$yn zV@h6repF(jh(ppm9cT&7scD|(v}SODF;Z%rL2WD&FXZ*<7S*F9eI_sl`<|{M-xG!G zEj7+fQ`BA~vz7oPGmDhBFQL`?3Ejo>0?+CX`@X4HP$`A1mzJ9hk%34sSs4SR&5jfR z`goEvO_HoGDJ;Jl0^tS~cnB2zTmv+}-~|)k9wfq1TuT`=N)uU5YK6+%O~Do-KiuyJ zD5FT>RXDFELS5b05DNLt$^d(D)-=!OeB`33SLdR)kBUb4!2}}#vi~M%@_mHf=t$E6 z@n9&^IKPG~w|}T@3C;d`aSEc<|H-9;F1rJ(>(E}_m1r-&IbnSW;UU8Zg6cf8t+BI_ zKp@~YHd3j&y--Urt1CohQH8*ExCrj!kqmQ#_x4LnzYBy3dRH{l=`kn8#Kgpf*)Tos z3Qk+3-au_fmgLQgbIiglg}lQpqo}ZgoN-DtB1VTE;a^NZH~v4K1XF;TSm&A5dUfh$cZd2NOS zXaZx@7V`}$n zMmF{7E>uM`j#zUNz4FAQf%-BCGphd_aFVk`V@>@W%P0kM9-hU5`Ti_P>0x1Vvco-* zy-+}iGT46~0BZ$Q_u>>+(EZGT>vzbaP{BU4DIsG-wLWpz{zD&_2rKSh^-%~*llirf z-q?ge9C@F?oQiB}xE>!{ms~)N%Ty_hXx|0>%{6w3MJK(`iM$f(ql(V!u9Kck;C0rd zRx9N5pFo$N=e648gnS$H<#_#THCP+Y4+G4bmUby>8dOUs^3y7u)|?U)2_sGe?>=Ao z7kpb)$uuW%wRf`M>;BVJ1j@)3rwsKYpf;336OF??fkmOQitIWyAuI-5_|w^K#mlTu zvT&6-;ChjUJuO0do9beZx!Ty=G9RFNohgaGn-qZ~jqrM=vc}@}d@bSz4XM1{Uky zLdriLHy5R9)D_BW)9Ib-U*teLZQgTOuU%tNf<@6#e(Rg5@cTa9pgdVYDoZzRH#T8) zO6}CK{M#vVUcr@f)yMUqB1o#PA}~7;;UV-|lcn_9Wh^au0eV5$;<##SYxnfKoMaO5 zPNh#(jHx9zTHM8tt-)_py)H9pefTw0TU_QR7PO)j277#dCI5`@LK{@9IUQ4__8R5) zo?Mj=yL*9VZt7R>m0Wr>VeiUoP@bJx)c~l_4^YMB)7~BOG%9R~w~RGal2ot_Iy@%L ziFoP4UX}qf^4WU<&slHN}qe9&+duZuzuetx{ z5K85tO*B-s@t?J}#&Q(&_d^et`b4k$GJYBHZ&o0Ge_$QmLPNudJAU-gA|ny#UO~J? zs2xf9`7%%xNU;qF-AAmUh!2TCh}VnC>)XYMJxQw^#c*%V*|*($&5ekh`&Sfq$ar&$ z)NuK0Ic^ZdX|59`YfV|%{Lwdq!+Ds@SbcYYztcA7!S%!+T`Mjias^eV z=f1(fBss`>exsNF!Q?BDg3fG;H{FV1r7u&Qntt2KH+FQ+p8s;g0V0lUA1xNO@`J?8vR_Uxy0y@WX*+R>gY)fbX#iU}vbVXXBEw@JRRTtZ;7ASIr%JxN`5oc_sP)B2GJZ@2A2H z{fDY0fw!LLxr_n2oEkj@K^49y)adF<#lu!q3l(_DyDilNCU6`Xy4F&&=$7md(1nGU z$~s$mN**ImBf@0)|CjI^{Xa!P?+JoCw6BEfWBnQl-?bQ^I{f;7$F*sRqZ1GzuX&M^1hf%`~Kq_l;@#`sE6lyA1dBE<*RVe ZN%!>7+Q~`oCJh?+JFcL16o1&%^MBt4s|5f6 diff --git a/unstructured/ingest/v2/assets/sequence.png b/unstructured/ingest/v2/assets/sequence.png deleted file mode 100644 index 6b79db305cf4214b6a9ed1d36b3bd7c193aac1c6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 346247 zcmeFZcTiL9`ZlVF(v)7M2`H#E1JbJ^0x!};n)HAoA|N15LKUP0K}9+Q1q7vs-fKdn zi?q-slu$wmB?Jg?R^I*FbI$qZ^~~&V=KOKy?7zr_%v#TSp8LM;a$Wa|Hq_U;M0=U` z%$YNnbhPh1JadLN;LI5+81)6<8=3no_kcfVy&h`aJyX)pvvTIll`}f`?mY6d!8OrT zifj6}SYJu_92Iuo=pCro2;(@g8MjMzYn3jdgUyQ_J8ovQOE8a5@s=hy!*d(fzYrJumN}2e|%f=5`UzG zfCpmVISl@%YyQ{sfej}A^T++~eg6a7_B)&d_n7g=MjTCV2E8d6EDhQ#+)5j36efEW4{&$;j!SYG z7FqGZq^J-R{XWP4 z4ej;Ybq1sR&*y0uMaddKKlnV!;t!}0O?{m}n(v2sBfi-AIt|==H{kduHw>Kst~Gjp z@!cP`(p;5d2m!-p1qazB_XZNi^W|xeS+a=th!~C*PB+LrcciuNw9M4$$-b?}Jw>g2 zTfe!8IXYY7U3^2+{)`f?yhmb=dzN^g*kus(kQA_1Tw}w4L2}sgNYpKSe0i{#9H}(D zqCdyCU#o?7&d)SGe>^@ABwNNJvOhywl9Sv}+iE@6@y_j44kjq*v~D0!LK&+taN{KS za3w!7eP%Zg>6S56vfU56fq>=i9}EOHf0I^I;0N_Ybixs%3(OA_JttdJr{{NiG|S(wywi*E@HuSk zvEFz{Ix6d+7Y$FJf-Way32i}!(q}5^eJpSX-?UT;v?GrUR1X$jMk-ASKo&)9X;8er zRvxGGfT4h$^hsb=n@e*kp^*$e=;>%7^|XXXs^IK3>zWTo zO5Z{y>aX*74;vwyf=~B@ZE?7|*`Omm89xhL^zU-9TjFZ3n%1l(w<2At9Hz2%b44O* zn)iw|1NBw*hRc8(MY;zcP0l@$46wlO^KSZ~wyH`4#b4B~n+`+mI+e8y3nOmd!qyE**g zaenb*v&Yl35jpXy^~6oGiM*UWnyHa@JBIByI^iqagLcyGBYAzyebx4H4V%r#({Vjk zxG<``WjnF-;?ZVHFcBMkg7x0F@g6ZjdTdx3@pKLD@MQIyV9g2TXfM-3b=)C<1GTkJ z9^wx6N`qCV4*Gp1g7#Mof)#OfSlHUfY-MWO?{rEDeEtg`=77!9k|%9-K(mBW7GHPi z+Azndoe;eT1i;e{@hYQ}G@s_D+&!R{Xum(W)|F5uW<#99-5kp&e=y8@6ei#StlH@T zl0I#b?u%d;61Z|TI*~;{IIK5ddd+4|I_seK_5R81>9K9?c%K`){E~8v3RAEkMIdRO zpMM4j8>P28i`@g)w%+iQt~9P$Z_>sG;vAqk3%0x29Blh=OA#gi&YYi2{+_PQ2TRD= zz$ledg8Qkg@_yZbxX&Jm3_=^vj?6);A-4!WH5kNw)Q(Gm2hhn0A#dnSI5yP@#>IE=L#ssV1z?Il zBUNuhmNockgLmYDjd3p({E^n8IcTq=^%}{z(9(MO3FVDteDk0Z-Iy? zf~y*{L8_4dGd5ZEDQ9gQzq6Bd3aJHh4x5PH**~mXokI67Adq~#Jo@|($@ypcP+_n^ zoO>`*)CR8a2zZbdyV7gH-g}=dnt`%_1qh1V#lEottXh0zaP!z0DgmtZwf&n*vgVs( z!Nd;!7+w%lTr*Z>lP1~9QwM)!0Ku?CJS%5_V-sMzxvliysQCQsERJ)R zJmotpB8Z;$oozyYVf8UGKl)6iay=tpqi#-!|3LfqEDytg$L7}7pQED=@KEyWZpD9a zVM9i30fHl{4-3TcIWi2@E@wkWg?jXpf)A2|El6S;IRD*2Ee=70I1eDHXla=_j#6My zBW-Qv@E>1q1gwvKzZRuJ0oFMg@L0ytUM`&leue}-GQVE^p3nYdSp(MnR&f-m$HXx&-Ucg$^_nUP03pDo1@mCAO2`^hEN@0 zw+)12cVc17Vd2ZgM6aq4?y>m0bfjZ!BV~h5$<$oeW9n&~gYbP-Y+!9a*%Rcjc^+<3%;><4X9D@ZmIG2nayiES022r;7uDbQx-^k?B? zor;pNJwPt|a>@f$T!z+HT7QHS?*4$q$Dls%oC4me0s+u8{kb#cx1)`Q&3Y-<%E9}I z#ey3@$R$j%yeEtgCTnRK-Usf+pyS zbE}#h){iWN-T&BeNJzJ~4@$R8`4Yvj|@x#UvyhhO|;3*8w|YmxUn7M82i zsoiR)8(U}n>jcOToZ4Xl8(e5O3x2Va+kMIe-mCQoKWBd5`iors-Oj5N{DYA!N1hSv zcdZ`J`$78shJikk$5R`=wv^mLT6_s0`dgRMrfT{AV+4xrZg}&t>wzzP+Fh1f(;uj!b_DNdH{K{m-|YlEB`~S2f8b|MTAb zKYnZX--rGud;24*T)pz&lm3t0Puu08cYru?G3#VEpUUvv;tdQQ z#m`Rw8d;0q-^;8-gluk*(1H6)PD?DoSIhQi0-zccNTQzY$F)WD`@$!0ttC(c_LJ_W zE9=s4?I)*DFnhxnrhkF2HHz5GUwH}iE^ZfmThC>xzRIwYbG7Z6pg7adf#)>19|5@i;1;G2j0tny;x1*;^dMl;o=)`#Y3sQ!!`qH9}UR!zBHM4F|JC^ z;o@Byhu3v(rD^L@?p8deP}+9fqtlu>BlWF0(DJd+a&X>a>P;DNiyJy2CU51HUh9GL z7$Wk+te|SHL$DWjPm@AJb=EMIOM2PLfQFrEz$2UjLl^yq4CnOO1t!NYUNS8o{=KK4 z(L{r>E_40A*!?UoPmY|Y6F@OZPdvU zAu+(q$5rh+!>FwNP*$WmGd!Ojr~CukP=mALiiO2z`kGOhzAzrLjNXK~8{VXN0(UYN zxn*MUAU}X&@C5pxzy7+n1n7HXFU^iP`d+i}2WiOJwro)kT(LdB>apfQl z>`RH`uXo4{&|j=B`%!P|kE;nTa@u2UZkSqw!>|$#Av-^}XR2w2UE`~K36*c$1{@X5 z*5SM^-4{$(EoOX6`9wAggx?D?nq@+L7yi0}TquIpk9gBJT!;E!Ob-d_EL%6L*`279 z_#U*u-Hje%$aM4cmKeN*RrS*O*K$Aqd$~?Be_n2k%8;}SGKdL%>vIq5+^;(rsJ*K18mYS{mvPj4En?GAX=OBQ zdn2i)PDbQ&)<{OCLCMw6w$BQGM1%xRVv@9(97g#cPthmbxK4O5FwA-+V0ZeHVV}p# z1UH^#@{qUzEPQf+)9!U%9y&vPn(z%>p)l`guxfQp!}+cY4X63{F;>X_s>gQ8bBY@j z9wT*rS|yYZffN5`X^ky|mZzY~XqKHWP6m1FmS(PfT*WqjiB&oNxgu@9RXIktC62>m8tux zqi(?JAyL0N!A<=7rkj7dr5DAaMc??r!e11J5!EqP)aV*-{KfXtAvV+-I6O3>0x$FXPgoSw(@kqt3MC0$ydklSw}^+~ugDI_%UwO-qQ z4e`H*_`hR_FE^Nd{;dW0uTlON9si4t{|(ds4b%TW3DX2Yf-(W^(7|SA1-DuELEA@e zZAGdnPp#&48WkuUN`|d;0m~k0o6I6W$2c1zkn)m{~=Is zv4+;_#Qfc$1T+<$dqEJW<&+=}VDHFOG%cBMRyue3RWk5K20(_aE!F^y_y{W1td*^g(~^ zG>kPa$wcr^-=EqAcp4AC(Fz!w&GyISaJ8cfHC^==Pnd6jd#w+%7B6SKY^SjYNXo2q zI**Nf)xGb?G^2twfLIT#T`ef2wftJk8j$z-*<}UBvS8{j1;ON=H59%8C_B=6P=yY& z8IT_WQ7Qup*o-Z*Dgv^TJ}cIlOa({~kZB9RQ=3!yR`2I-O;0ihCV&3)9BpaG-?1~KYXbOb=#;zOqV>5P{3+&mLB{aN3% zlBF@=5Mje(q+}4t>eZkoU4FV?Xmv&*>l5jB_RP;j%fg;5t-A*Ys51`j)_3r3Fwz4( z?JcsXvzE(BPcb)zVyggd~6o@aL=GX&v1k}b+pixR#x{3lzH z9b{3=V;s}ymEos27tiA%wPRyHfOp~9@GcFAAh+eUF;r^AJ0PE(I~VSZlG!PpU(#9M zx6e199&biYPA$o5-tsMNBK!plo~AT@(7(D=^&=1{Wc;^@+sQ3fm0A=l!Fy)GG6Oj} z8_4`4GV!*xd3v%b`m13n%_SzH-*S4@bk5*(*T9SfJ0D7>Ue-u`v%n@GDcaOd=GEB4 zR+}Xzj{jDiUDP`8YMj`X_AFXU=^0MSJY6t2$!)!S9@Bf4Y2SiDx`sC6gRKgb@RL0OwY6q@EVRdU9Hx+*A}bl6cHI@A8iE z=so3K%OLiX3sBYG58Y#A*0Gf;U2NufefDI%o!$I)r=dwm^U^(Uhl`uRr$=Lfh?8go z2v{PN+5cy$ROC~*BJ!(k3IKJvSALq#-oNMxXze6P8Cq(GgGj1bq9Qia*M=#%5m1Z8 zaIUD4I@H2Z%1fX*K*-_Gq^lRm6mYnscDl1gmR6WsxzGQ_WL?bq5lF-s45auru{6afl9$3+7 zyJii;2^kI{FP>^2#e-VOx*zdT5@0S9TVxiwaYfIa_Qf|NY+CHWh<_f$pu4=cJ3wmb zz2b@a($7wwK%0_;{V~`0rf3x=C}h<*>#>>JJuzyXl+VY2ab2{00Y^T%@M?Kc$AH%y zX!2wpH8kdnV+(o|Q_O5ACuobV6MB34lZ5%nt$>88Xs|)}r$zN~z@FRMzXbfy;A1B4 zFH}u%c2OqLFy1Lq+&7qELBdYAB~O`wC>HPCB3#yeN|v>Z96n5g2f=tH3&gB^1@dlY zY6fS4J*%H_tt#7&DHJ7`HahXaof419)M+b+^AMzQWWt9RF47*}mFa3)C}c~LV+#;0 z`;6PM8TR+Y7IPXNl6}A%v+6(OdUR4vimmQ(J=|e>*3(bvN0{;&z|bA|2{a17KO{5Y zW$C*M^jRjyD2R-u|9)k2?#)^MrLW)58ES9^uvh8z57fxSyKCSqWI9c&_Jn;>u*csL zPM$kFg_NwwTP3>F&VDcUVMfBxB{_F0@Ppg5M+;~0{AHzZxKWhA+;QFZdtk_?%~!Ouf#1LeOMzAuqrX-sB)p4M zu$)&YbY;IE05}RSxq*>}%Np>!XK-WG8uh2jLy}fK-5(|u?C1BTbX30XiwR8XnKXk` zxCQjy$I(>I$E!6nLP-EzIT1%|noUUjcxAQ>0p2Z%n+#U#pmE;ev<3Q-mODtZ3d zq{Y3buAqBtR#GywnnE-IY`|k2H?{k~OY#PKLp?U**G8$!$+r%Q1zFd@*kXYs8nsKSS;jBdx{|e zEu(TAZ8v%9&o?!8}2DPP1}bf?(iQd zbHZSJP-RHJhZp4|jJYX_%4Ya6+0%=i$ey13O_Wf?(vu?7`Aa|l;Mq%5a!&Cn(w3y@ zaZj9MJp;c2h=AST6-r8C@UnhlAN`Zf*JBMa@gJ$BFi^ZZM56P)Dq-;|2jmP%+Q<4g z@8U2=T!&*(0|3O2AqNICICT_qM{H`x=A+!E%@jP;|9XDS?;+2*HGFfvqTtPl&0$le zRABQ%!Fq%zFmqe7yM_bb$o#s*??*ZlFMEijd?W*E;zQ`7kuSp`1#!wOI#r zN<=+^OvY>VW*}|Ej`q!spO1q_`l%0r@@{sPJ??pu8Mb$!lUv`wJ}}grO7$AY{A`i; zMV}gbh%z81p-}S4uid3HHoX0gm2zTva?2@Iq!T^wsDQtuFdmWO$QmGp&54h438XGh zr4zi6I+9FU)>6BvPqom&wG@Le-ta>yv7d-+uIB<`-RLHa+=u<5OD|Tp^?(4|!(Sk@ z6mwAl{-$;l&~jStZuXvCmb1_RkC`B7(4U}+()?FCwa23qm?W3{OiK!gJo}7v!W)|W zxlHKURLvVyV<-22R@E~T-u=7&q^(E$_{aeCi z{q3A_Z$m*5wX7UomqzuXGH=P*U&8JV8A#sNLN~kW8S?BC)2lnI^14Q9&s-q4n z9Q@R->@E<=z!hC6kp$=$D}y2DaP1N6QP{*P^P$n&x)!K;KtHH;gfi*n9p#1&dL`C| ziHg)qV@1ApzyyORY0|T|^roBqxGMIBu1T-&gyId4IUVbBo6E|F&HnO;gaiyvlYTE` z^s#Y*@9XW<`gIUDT;$RgpzwF`a8%Ozh>Yf<^AL+gMN}l{E8h6LL?zqCBFrGxJ0wqY zv*Om>oTnTjmt`@xF2AtAbzPE+7 z?)+r{S7$VNgxC|PU);LIa<5H6*p92EelbNZ?mFh%b&AiQ;`iSneK#~%H74YTUd=op zd2~}>xI%O^V!U%{^Vw^vPzkXtcLdg|o+K>-nL6zq-6`-(Qixc_HYtIDJ zxt1*hq&8{pCS_L`{$J1eG<{q z7ur)B;XkzA!*qC2BLWK|&zR%Q^NW9v*=tRlxkI#YsF6yU`YvPX+0<(OH9<#{v+Q(>_;0$M-r*Tm zy{F3#CqPD6zQvHE_QSmQuKQFQUoigkxaHIg@VqURMiUW~GD0>$1X8ldyB=rqC>VA>R4acH|VoB+C@dNC!#MS2!oWG~T<45Q(X zW-0w#3btqaauu(-^cU%!TEu2&?L!y~{75O#_~6-7PEQ6GS`Uy14D@#U5t7r?r79M* z+a|Vgi~p%UD((&BB}$WIKdN|Peg}{yn)UjM_+EK_n6%6xdkyRLou+cI25zYPAbMY! z&sw2)3hq9?PyB*-7A05iaS{B%w{f0V`iNPT9(%mdzS(8~hI6&h+S7oFmnSj&bD~m` zj^dQIGRQ>1&Z#6d)?Kd+>Y5@V`N=WenxWK`idJd`7VwR_dRh?ENYK&S}_zd{q+7MaNj=f4dA~LC$$Y8-{EHIP$E3(%DN24?<4nOap#vjAP4K87ke9=*> z4~dayhMQyVL2|oaQ#yx@`NUyox&JyeZmv(p1Vp}Bf*}(f{IYV}4q^fvUt zwZ%5RUVMAFajE7V)^r`BcJc{j==HEH@tP^mX~erm zaMQ9GV@D?vw@qObQ~e3-nUu|A;ymn4U-;|BY4cw=t_VTmYmdlt>X~cf{Z#4ips6D2GjssphX{2Oqv-N7HmmN#L(3Z?a=t*_Wv=A}TRp=3P#? zWn1EQ^HlPq#UAOX9|s!hP-y%OYB3Dm&X>E13>odOf!y%nu*}%)i8S3_Q9=fLj~ygM zwiBZy@G^BV2kCp3|DEQ6r4_pAJn-sXK0n8{=UPnQxigUhap>FMhyWoa_rl* zAU-KVC12g@(GA%=6rIqv1j(HGZC}s1P6$8F&3br}QeSZDC~%(t#p87Gf$wP( zCE_!BJ@a17DffXI)5Qjkt=#{h`t4-v%WU|uZQhD`^8LU3j*8>5!pM!%KY$kDRJly*w|3U`w_16lk=c`SFkIfGXSbH?e9%QW|KSkC2NQ{rL~ynwo6ayv5nwC=G=PMMUHV-U}LKY2C|%5bU>{T446WZ2sOu!zmb z38B2ZXa$1xXlWzH3NT*80uL4em{U-@Rw_{OR%#EK%Tn|VtNn(WAo~grBWss<1NCuw zvmLM5g80^00pNoCxJUX>MG0wh)-oQ$3lxHHE+hc3WsU3VKZ#ZHFs6Ia@tO*6dvfJU zhH__S6^H8fZw4=FiOwq+BSd8s|NN;mJu=}7@1^=6>Vw%4e8?b_eSpLh#r9==v z{)Viqz=a=im59UW^NC|;qHWh3cltFizONlAl5xXCSR*F9;?DLnn`d{~Us?br5=B+@ z)As1?@_|fFf9j8lSa#QNBy51l_UQ4%wFTnGt&>B>b5P9%6Kj z5UUjaQG)LLMb?W8xkibE>jXWy}R&O_YdWS(%rQv`nHd>21uPkoUhBhY8$fy56j-`fm z4Dh%FJ>g79-H%!MfhZHdV6@Kr>b?ePJ)F*gx%S-g!b$K^O|YmB!@Y~Ly11@;QIa0D ze!(8J3Q6zIT;>3Cv4?p4UZKO#=kk0FbzqhhHecw8bBU7kSpLRBVbbYwj~;StLO3tK zL4_?68+P<~+sYei(%rg4WNrZ3cJ+-fuUo`oY#?Vz7twi*Z_CRb0<~?VULu+W#0HMt zPTn;V3lr)u5wEOs7DN<~F({h)D(x4CX-2s#=rq6|GR>+)*qIL~K2)O+Y;AP>ij3iT znV8A#fKFwS$%GQW{BN)F1TGf)3DGc4pRL&SYO(Aq=GHQ%?H{^4EfDvwKzX00hiOcrxXyaJnpGfzhzQYtFq0k^{%@mZr>Bh({#mB_>p_8cr} zg?N?HfR{i-L~k2adyHAb?5XrSbyauYDd9UKY%!8}L30Dc6uVpN8Pnx;7$$4tI}dE` z^5m`j1#GV<6E+uJm#J+0Kj;*MgQM7+DLDAQl!?-)s#`gxjo50!vAMxhk8lXkyx*j0#NwDPSJ_43>@odD+O$n7Uk~v?j9&h zDo4lO-x`ipcDP6Z0MgqL07rjxX;s0eqJI34>qlW!hDJf=hWd4e406YJk5a6tD?yR5 zN&)(v><*dH+;}Rr>jc*#p)Dgu4t@rZnqx7;2Q8p!2pX*w3S<>x49MNQi{p~?Nyg4k zd>Ug!z#!jb{NZsLv+Nu_(I`PTte`Mei+pL6S2%eLP17n>(Tg(R;Eg!u8TAVtCsRn= zZN}42=Lris{O_I(XQ~)i>kmD?K;hb#j3HI_oP@@|d5Y|dW-9NPE;s~NrFUGKVFNwPYRdkf@3+QqthP=5bjYDG|7?o5n_>QChB}Q(>1}Hze_c4{D@}}S@%e0=g`lk zmdx1XGH_$7N$1sGnx1w6(c1f$B6z?8kMx)SgoGi4oo6dvT{3aeyFrSmUx&-VMZolm zKb^;(gAEif@B~OGo@F0gxlRp+zi$HvP08HMYXBxs*&EVKqE&h@A8O|x9TdK^_+Kl? zYx=$81XT%TLE*WS>~i_o2w$Fn8Qo?~R6`nG`Hf6FThM@673JCGUVp6~4i?D02@=dV zVLus3tkuxD*+bm;D(I|8?mx3SMXM-;5pomsZ~CZeJiBiftLedG)2ZXLvf%Q~8JJ{C z?~K0x>}^MD?k9&!1wMpW%r9s5EY=z1`OD$*4>IN>QX+*Us3(w_w_e6Eu3t&gxTQX8 zNH}l)3H_&d>KD0>{is8r#ZyH*6{WZdi_UXpcXS{6DcKA^M!_M@jtr9xy{* z{Vbr;0wF9Gm$?48zoo!+j47a8RQXQkNC~hMNN)k*-fOY2?JAB0c6K81r@)3!Uw;_& z_k&aNtT|Zjb*Gwy>T5weJ;Sip+h!L%lR0GFfw}NUafwDa3&InwtS+^UqGGfc%yhl! zgSzF(!fm+Ilz{35`@%zpPG*RvPl%JR`i!^FuEUrM=>hwUb^_=P=ctvgL6`5GMat z5BlWRef_`jBwpJw2QilBfd>js zSOji?2b^6^;n-~PQEcQra=(1T4FPF@AX&=I9XkvPE^GMlJMFa;MyrS%ebj-A1j>N} zRWZUr1O1uAryh|QPye+jm1x^5Q-Tr?RfWqoI6JJIO=In4;37(SWnQ%S7(Y<*6Nw*! z!X8PrDQd2~Z~UCPBVKodKBXYu&lvSg!~&TOH^rSeDlE}5j+oJiSz+u&E-MKKzPkx` zs*{YSYr?T>K|Z(u7d&R?=lW>tzn;TYwU_lj>`>KU+w+S|0l!2#=V#4W0?N-aR9_z$ zs{^_dMMNI{@VPgh6Bl-}{01e^Z2EAg7*prVJ@@QV52+5LEL?gs>XB)~MJH!ln3%5N zkNuF=+vYXkrst-;VS8=}*dp&&GZ#s3$PXYJV>iCD`SI@(tn0Pl4qL6>2CNTN45`#4 zQF(BO&G=!s9Qvj=d};*g+o{?RpUN&N)ktk#QNbAF2)wAtS`baP=IHZ+-TOBV->^0Z zSb_)YcXSXhqhzAHkfLsRDEWaSE{Blb;4-IFCA@NYG|`9XS223lLU--o2=Y<&;xte8 zUV-l!OT7)81ppu~cD#EYiYzI)WLCzZy&gk85abFaQx*(?Qf^732%J>%<$3W9O*Nw$ z6?$Ntd@I5lS({xOa_6!Uotk7YNS877yqZtU@9T+qU;}oxf`2HgCda*iqF+Z|X=9GF zLYJSTjlt(A3RNp~U*Md4Xg;8JcCy4GAH zTz#ols3MSiuM9cn9P465rmzJ_F%3goCrq!Fn!rYhl9##m8FO33X$}2PIVEZI+n#&c zS3EDWa{U5$i&VV)^hmLIuqR-ap$~i%?y~a6dls&$nLCOwhW}*dPta7S2&9zvi#(9} z6#uEVK=}6}KzP}*Zz&|zLv|mqV&SdYS3Y-B>Dwb&L)%yvIlV5s_RY`xwSy8ChYYg^ zID8okm{)&`=y5wRY3|G+#rTWAqZ53Y)DlG{N?4RzFfpR0Q7ZGKa-Ix_JDK$YDQz5f z`LJ41L)HuoH>00MuEuPe)@4dr8s;ou8gea~hQm3>`^F5Pd@r935qVoEILfhe}FDdANs+LfO9VF~nr)KSDaB--cM@8KvGDLBH5QzNGSo9^A>)y7Z`Mp8u!rlv z@OTV(eH?%}*8Ea2D*4DqKihM61-#e;?w=LjpofxJ3x=j{ejAj}zpVsaE(uK;DE;np#lyM7^m_mVh z{2f6|!V|nzB}gfNC174C&3P?^NcpewMW4mI{0sfme7)a@lKPP-RAmiuNd(ne|KxjG ze9rl_D*gH>bV6e)_k%S$VFe&pzzc==gO_s6X3Vcf%mPw(z}0o$N8jh>ILDO9*a!}B z07qO2S+5!gg|*PwA*XMp#i($d`3GlT0^%mGq7pBt?rGacWDR+88EIx{gyX77x}VZV z!3QT9XLtpfK(kn&xQ3CPRMFCC1sy80Fdlc!K9mYosg;p);7FhuFQ+W1z!v#KQkD_PoXUW9=vQv}L~;%s8dMDmal?l13% zAi0a^;io zmIu2jp8&p%`VM-!2y)v?>|C6(DmWoi#iF(FxdF~<2bjn$|HeJft5nDM!p`YV93iG* zbo(sx+11*dz^QRo5N);*=UlBc0WNLFb42}cv*q+8!cchQRqcfNck~ean>aufB|Jzu zK#Od2w<|IGE_E>E&Mw8YW1Www_HR`W*1YFCra{#sGUqsb<=vuP01DouIaZ-@2=epZ zPuk#*L2-bf?9f!evv51q7~QLR%*x843)CT$*=$QHgUOTr*i2|G6+f!%7Vn!Jle@e= z-a^Yw6P!I29%FmXs*jC<77fQdSDV+2*Njf*isxs!jrzClf3+r^e|@j0caL)iof&#q zfxh#&_mV|;n{TVg6Qf98Sx$G0-^j5iH31MipEw~qbHYIS<9atcSJOkFTLR>&1t+w# zeln*i)lBi(9a#=%FY!CdsGaAwh2W79O@)U81+r`XFg?qHflEN%6l6-`j7Lf>_3hR` z!n?H4kn0^&0Xuzir2uhNlRA1Tl{G&tiQ+U}(_JoIIcJgkM7iB5#~rJ%6ps+*+##al zo8Lxgb}F0zY{uZt{%B4uc~IDS)rzXH7PB!H>L%9Z!r9djV<*dCMQ0ZW!;JaH>@hw>*3)i4=5emEre3JwJ^VZ`^1 zW+q^Rdo&e?AM3#e-Clt1D9}2{V{C!dl+2Y)p62q~uD7dU+u0YVjn;6uE-z~OC~=JO zuhL&*ECGcv1Ms#pct`!-x}D5Tt6l-~&rYx}Xlc#D+wYzTmc2OE7`g0aa@txy2)yKA zSuk%Paymo*J|q;1R!#rd8)LyZRtkW|p1{GanA#teDYWpoPk%FHmK**x0-oLnhrlig*#+s(4)BC>p3gsqt9GB_`BOyTZXLq%=uh6JFw}|FB7?%y zm3I$!sct~_s}V47Y(kU@SN`~NxCcz0cU>;1(MZ)lROT6G5iw}N;xL_(BDCg6wAo6j zqSu~^#|I|OT#c~yj62);Zjea*k?xE1Ce<1XB5*w9X+@u`=wZ3V<{5a1XYVHP+J&sT z{x;>{?E=fBf&+PeoreVHwcG>{)x?8XHB%&qg+Us*&Eky8xUcNqn}KBu(69U?-}-BC zo9$etBJ}z7_PS-(TaTvZLN92Dv@OIz?HJCflT_~8mxulcZ_7NQxsQT8 zZtzOVuk?V~cd*CT-6%|zpFFvu_iLe$rMdlvZET?YU?WtFgc1$n4Mteao9mL2^gdCwg;?IcD1zA-}3Xf1X*AnHR&vX)*kGgWRE&RIn-K=*9K5|R1w&?b0lB>(hU<`4^`bxuPari$o%)SAqrS)2 zt;~t%B2n`PtKWZs-wo0UfHu%=MF@3-*w=8!D zcL#R&sI#}mx2B`GuTWp3Z$&m`{0a?uUS2xqi@0N6wwN6ZW=c|}o-OR+)KE0aEO=l9eDizYj|?R2<<$$wT7HWyX>eIl zeECyE$)Ob@SpFJqn~2AgC5|GfCsV@R_>gC@dLC%Ie4N31J{Kc&di+}pFIP*Jdv(Cv zn`^YnTE7VV+>XM%$U!*=v8N@d1~~;aAyW<+%Y`=5dm;vSCxZpL;5qO6eGdS9G@UZN zF1|mI&gV{)gh;5y*k9VtlWTIui!K$IY^p0Pp)#7DXF7azoi5*+DoSx5AvbnIPv9lZ z41sgXJX`iX;6s0hrD5+!-~oTzT0as^rZjXe3Q4VPuraJ28TKv0Zl7KWXUG$oy82vt z9bnuadCO(aF+GzmlajpD+-kqiR;7xQY+}0p8uDZ| zw`YoPl?Ny4@W;vCJ$j;*+(j@OsZLM&-tT3y9v#9+A=Pb=E(SHz&MHMQmM{}ji#YB$ z2_qW4#2$Q#C$(=bXj~0K9rcWinb%#;&mhGr`s~7tKmH>Ft~LkXVXL|GM7G4#tPi@IOGE`eD$< zbClhyR)Z}aT8QM#T^74m^BDd%C9`I~eNJc%8Vm*FGebNI0A=N{rqcHUcaz|%iJje0 zu@^}Hy9cll(x>fBa>}qRv3X9x=?0CZ8;YHcEB*DhEw`@e4LuH5 zpA58&^=9U`xVki!D`^qlqdbro1!5TC2cNXY6)&25JDRa&&F>>%c;jdlgCox8e3qX! z#RurR^G#&*2C!Fua)ds?@!Khgy4mCh|JE?Za3X|Ehmsy0B;3H&X+QX?b5aLYamXyA zJ3NbcQMfJ@6&TIoS~X(&_lqg{9QqVhM=Osj~vBj{VxbQY9(|F7dtdwH*B!Ww#E_T8}8&O~MIJqR;lf zrt*`l6?O>H!@TuA$}BkX8dY@8Oz9}{1j>RukL&q~QcrD?M{dt1G`mcU^nf;YOuzA5 z9e}%_}9RkIlpK$YZ8yODY3iH-trZ z7UY%!XzrFh>q_>9D7g&ppGhHYF|20G;KG0?>RI>aHVn~nwgp%1(A zuhG+h|Lf>!o!U4`@okX(Mlo1h{tyQPaKKrb_!Y`a8s*6V`rt&~H-WW9H?#j00RiegrT3lF-FOudhUC1-y zpm z_bvYma_Z}8&$nMF8nog-{^^mK=z-8+-}+Ym&df4=@!`Vkct#*3(}l}@a~6Chx17zu zRg&vTHt^2Lr}nTCbF3I2h*Zl7Z0UGyRJ6Hq_+nZ^@BbynZUg_ybPp>e$rW|+J|gx< z{Ve?*v_zYe2`39w>f+T&xxF>UcsbM}c}Wm*Hq|e3_AhewFO`5_DgnRbD*peHtGN4O z+30sH!2dz%iu}5G?pP$Pb5Y|!As{EpD$L2fvAWZh4;I>HX-9{dnr@B zKq-WYuL8G`5R#|%RMFEEJsou@H;ORmOx5}XRaU3Q(+@6Vr#thx*V+A15NK6LACW8KHT<|InxTKBk+GdNM2xrazksCdW#H z-cFrKdlsC+N8}d|5yK6P=vQ8+kDQzl+)(%4bA;`&}6U1A|^{ItU6Q- z_VEcivMfQhTHw>ER++20F}j~%v*67PdRIgMr<=yJqIUX|>jt|y^K;6{^|9jHloQ

=)H8-TehMuQb^W#&!u{Y==k2c7`CTUypT|Sl~>_ks!fNp>GeP$-ticBG(6` zt$ehOl=M~=F&u=ix>zaw?Nu=wX6UW0uKaDb*qv?d$!xpsN8;kbMhweRPUGejI2ht8 zAL=m4Z6lhsU~kIVL_Bj^ly4!O&Uwa%L-lhpZ3CDN)g#lPW@I`vzHpbNJ^fZNLtbhD zP+nggqftRzulU;=FxXws+vg)W-E>!PU%XT?zJhK2RH%EX$=#j|Ti5fh#YhK4P<{_v zMiZo8{`sE$hY{ZJeV6Kx%y}o}%h!HtP16>4YkHB^1YD8J8}7(@w{bl={1%6~M;hmp z<9X&FxC)fKnWmurnTW`qkagAd#>`ORd+@{Z~Ubh;yr|JXn54^#Rk6cjg+w zw{w_~o4q^W=v&Bl4?~_@sr;chTu}{GM0jTA*>9e!KSOVrQgq{aD{|ED{8v-v)QYiX z2ey1-P(7$&pSEtw;DJmT7QvJu4ogZc?&6enDl7Bcz`7{|BC?>HO6s0W6?JJ{Fq{6; zw_aee53{XMg&}gsta?lb`~IED6evvn{3f-l3xCUmT zmwfdCIKxOb_V5pWRp0{h8`=H)P)_|T$`nx}%6jS&@MO+vX`Ed52^w|-C3~Ka`?=4H z-(UH>3qJ7X^|Af;iLF09%6&eM|8IV?D7bM!D&_sQ`b?wNJj&bW_5&tCGX&MQ*>=vu zd9ef9e*2-{znQu!Zr}~62Lwe!dzSb)ud!_=uFd7pF3Uma%(d&Vfj7fSlWvEdak%&E zGqzLq!S9z_gI#r4>+BI17pU)M1T@_&YHh;wn{?#mqD;H8xenN)oRfB8;4-vzH2Y@5 z0_KBWs%D{3B&^?V%;zC(7*R@c>qZvMej=u;k4vDsJutl(1|`Ufq3!z} zl18Ob#8VMBN^yBQJ3`g1Sca2Dn)Ka)68mU&jC-w^T~gg(=ask7KZC%RTij*UbLCel zj0iB^tF(3J{F+TsjMh)|Gg_yCDnAWW`NZB|r7%CR&;7uvq%XJ$*51YXzgVvP!KElm zd)=Djf8f#@OSi?{iK=d+-at4J*cyTignTp;|RzE@(5pkM|VB z!m$3Ib+NxYX}aT^lPPdWK=F!>rhhHAKyk=tvJYYOufG@VH>yCsX=D1K_m-RlwX9p| zexA?q#Xk)|zJ3AN_*i5i~7a~0TsvWB~HzSRbf zvfqyD>_vz1Pnjc{9=JR zg!4h$g9mMgW^-oSo#t04<6{D;(S)Hz^Nz|jb%pwpJr#HPp(i~DrA;gFS6&N%(g_cW zr0y`7bq8KjQ4w36Mz>FD{q}+6=Y{{iHi(VosXj1VYnoc@S$Bo)ta95PkRGx2n-lHN zaMI|bh)BEs@Jp9q;{2!J#HprGVj`2i*a%LZ-4ZTyZw?H#KlE&C3}b2GY*-tOn`_BZ zhde|`r8Jh0wnOJul5Pj215KceZ3&&b$PGHIa1v`9^e&&eN{N{u411Xq?2Vn~MSdNvJjT*@M9^lMd zTV3i%D`yyS9y&FRBK9$mM2$Z>yM3};LIkm&AlYSsknpeGHUPJw7!UHg590*{nQSB= zLxvfl@MKbe^gRx;QY*R}FMI^yesn-hZ9KJA8*;y7+0Dvo3p=wqtktqxt{ANYS?JAP zQ+DRCav|gRK^B=OA7>#4ZUgqNxgd4OMP2-;>b0uZ)?ns<2;_8-l|Ra7k9=ll>#RUW zv~YjqovAdTDeWC@GOho!<=mwWOIs7Z4G#(~EG7|8@#X8T^S@|cBOGk3Fv1Jn?l z(%a<4>H87cehws{wibq(9&2FE5f(B{9RgL%PC%WQp-u@aB>fn?1^wW>O~=KMn$CF@ zS)Q~)$G%N?D@qjFO@Hi0@~=7-tI@oX6q)h~gY?+>!*$5K3hW>%3gMv$4+5>L9xymo z?zrdHy@Y@(ljZ3~I}C<6P@_Jw!l3II818=k2{bcPRQGpD+RK!O z8C8tF^YiM@gTW~?4kgggGvY(+j<1Ix<7|cmc!R;xI~~Di0U6sahbgS!EWEy%mF!EyE0ou}3J+zH58gb-OBcW3jCS(dZX z-N!I28f68DxzMSv3Q7XXNlvdy#dw@fkuQ-Fixgr}&5*~h$Wu)-U3Yc; z$a@793Iod0F@6T}#=6^&X$5!7I;ZXhb0vLeV+RWk*!%V%TipdY&Wc3Mw51WPwc&6$ z!_I!oE{?&eXBTCaOWT=x9dNbS_BM3Qj9* zI=z5RyL7_AT_0jLYR&+Cp#tV z9tx7)rc@|Bba*xt0@|(pv37oIh5sNzhx3dj8VR+T%OMbtty{!C7l> zE=gspaZqy?d7<|BwM>S%L7$Y$(=3|7W~G;Bj_T3s#N3N54S=j+T!>+ys$#5Sp%=0% zUTd%L$`9wr!{W3)7FgU;Tj$#%|129;4B_PAv>swYlEG#1Ig z=z8f1`3yQvC=Ypf+Ia+2`vOrgn2*@-DOMZE9H(_F8z28dz!{}X#!Az6f*(gMfCD<` zW3@QF4J)3p`bJ6xa#F%IA1CyHy>`o0#pYgTAUPlpMN{(v;I74<>3qjA&S{_MB(2x< zIQS-lZ29u*TKbz^DmzEJFH?uQSy3`roJ-qy_FQnA0=6BQtq|%S?)MFs^1}(U-H8hz z%X@Wy#~jrm|A(=1M|Q9Wlf}5kni+EHf!-Gaox*MqLD9FRBqVVjHe}0+jmBbSSJZta z5LQdXbp{?7!Rn?e8>gUXZmcZp4-3m@+6$Rp={S91T+HES>so5sqH-K!o&W6>_d z)FSWaC`}8_>HyBSosU-=Zo!hNfARw|E$rOj*_I){Fkf|-w7#x=XUIAzikAXl6}>bWJ=Ss!@fb)!JM5221lZ$0)zJxK(jp~ zE$f7%5dA-`Xq<44(;W2LOxD#K*OeMc^j0rm>SC)s>f zi7XlfFauf8?#tZEWq+hgNm;d&?yPP~RSR_4jgmj+ln+QPgS*2%oSWX9Z!_W6+p2rF znYyOPafx?Xec&{`7P|I)*fVY13XILhnriO_Qi zL6xdcbY>Mb1Ul6UFY$7uH~F0c4<1GkM^;Y98$99MadFU-xBZQ*nEz0^gFh%2N$ELt z-cTz&#sny-$ekg&RkLS~nyze2;|CRv59g^WbvEU;21}K(8`9Cc z#G4bqR-Dc=8LpI*wkqgZOzWebp$9fYz94rY;*;gmIB_S%@^Jvj4h#5(D}&ghQpn!p z_9e1qv^6;SMg~`ed0u>@#c~G4IraahIH$5YEJj?m$|FG=0x~ZMyyyujy*0WXjq-gs zWm||<7ntKgM1$Oy_|^470kp7izSHZ^GckWUXaQ6iEMkZux;j5Tb%;xk zcZ_RE6+26<5KNmnb^RmhEHY1%J>zeXW~sYS1jYW<`DtT=^k@*!Ni-+0J;fO(97@5Z zBqoFgZ&6Oubr%8}WMPGZB=$1xSzWCEmP-n1U_x_9#PfwEJWL)9rKNEspB}ixE=|{b zt{yBba)H)lbVS+i$?UyYWvt}hqt4A?aoQFzOn>S#IOnD0Um9e@Vrp*kUaAXi- z4!%AkP~jC>H`N@512Q|dPHb1-35ETlB1V!1Un-w?=Fg5~f4OQa<#~Q5B!w^J&0a@- z77~=SN=CCyuSJ82ia{(k7bmh(5qFU5Y3xN%el<~k!1V;ve$WYMI(QLt-)lMmbm>om z=?jaX9YhK=62B;lUk_y15k~LY2{O&HW@P!qSd}c{-}abI)=ik+Q?n?M#~ya=&T$ZX z*u-G3&@R8$5Gw@O7|cU`*bw%d#lA#FNOX+4IZXl}{|Y6$D{x&CD@uD{?!BqV8W$~} zUeL8o$og>p=g0l91)Q>KT92Z(3w&g9_Vfrpnp%*~*Ja2VM;7k5I6dhcX9fK+x9;4~ zZoF7k6L$9g_L+r%y|fu@N&St2T&LRUI5{;T;#*B+kqsL~&S$1mevurq<+A554>d#Ov0Q$8)2@_t}CrX(7RnQ_S#X zjMQUJCd2Cqu|dNR%z~M8-HvGqIB8%NX+LuREI)}084ado^-mpMggtBdZ99tc!`)Pc zf6m~@(9`sQ^wVp00H<{}@$1(!&B5^}4^GE5gvUi~=VWO)bq_S1djkigju3tO*-?Iw z`jk5uU5u8B%o7uTR@IlZC8mvITp()&mF&yQN77!oky~)K90@evm^6FWi#kw3@k%Al zv5YA02E|JlZx(qkuq1Q@5;i$Y&!yR5%6aG>TIYU>Sg#^RWO>rjKq2}nE8qP&isHe? zGnngiuQ%7}UO@J0p|cK$uyxhXk2EH+L>%EO`c&f!B(Te6YA~Uq8*<|3HwhBq{7L zyp4AS)qHrj++=QzJ+7l9LULt_K~+TQv4I%0dhfMA9a{#vAp`dcU!3`2-{{!0hEP#X z>5b7n*BC9%PD`fS?rR{eK(1T)tYwrfDx}Boz|(+gYfhb7WW)%X(VHWw#YG}pHI0&z zv!=@BPC$EwQ+k|-u>kH$xlxr|eD9#HTvJ21?)GL9X?EIiZVpQzq(L_hN6(^Quz;>+ zO@K!nCkJnqpSOp_qK2DtumTu}F0gr-oe(*qhW(|Xw_Ml5pDlIy<1I{hkKuLgA&&9I zr2-n%f6*%ME1Nmp#d+1;l_jq79jb!y`5m;-27Sp&xNeCeI}CwE!O0e%fnb(JQ|XU> zEx2JeO@ZM;?pTjlNIIxT;dRO!z;>{pTLJgx*wN68LuaF<9)f62CaN{*{(In17dokW zZrr?mdXqGJf7=r{_!%59Nbt(jO4t&DyNhFQMhDp?$IE$jINT^(VK$DL}v%0 ztxouuXADA{)JMjs);e^`_bl&k9~_EhOXQ3i7C^q9RdnK+D%{e_xQA&$1{GKH0TG-7 z%`4OydQtfv#Zvb+tUP0n`O@rh)BEIaK2qP4k1<^M1Yv1PV}s9aP!yN2KwhT3>E5&9 z`=aaKK=FK9hoc8;C)Bpg!h)@+)eN%OY7rfbrPZJd^t=B@U+ zf5MYFsvDH;4Ik?&!)39&(%$Oea<=mJ6VC3iwjQt9B`P+aLQZ>znF&tz#7*=MlcQ2X z)Hq{@FVBKDF&~=jvYwjj$|*XIz0bChWY1K5E5muX0GsyUwI3zzKj=hLH(y@ z%u(z-6<`kO!|Vbx`K3t9{-Mh$YT5#Z>4~wahQx;2JcIpl;m!b-EaIi#rj^I7s?ez6 zKL`z<%M5-TdAa^^A+afjXY|>2&}wD!Wp18UZ_voRI5#Dc?%(uEn~8#sXb~W`Q;3QO zsYN!hKe_gsL(lhA;gks78BM<#Y}H1N+o4uPJGr0tu~WqlfJYB~pE4~Zg8j{UPn5_}-VJmBhWeA2 zNt<>M(VvKo8n>LL+eSPi^8wN$7K_bzCzI;F;ARN=J8d>BluL=a?WDI!(1L*iydnPr zab-sTqtA9eT34cAl_F0~HP~mcPb*n zoQ)}NhsuV~)gXblvG`{dCKCNxx27v?3wOO6)udDJAoWxYD zGSlLbdgAsf*1NoKi>^@C%MS|#6r#SVKp|SB^7*Ftf7KoIwF>5szVaz_;UV@963TeO zVSh!z+u`dk{^~dU{?FH^VQJhaa0!gJKmUX1RpMs&l(@1{+RG#_zpvEb!U{=x&(EK* zUTxS(j6j1U%}INt{`S#-CWjdI*l>q`Sn6cMJxH`40RMp_>OwSMu9)d3V%%_oRV7=T zqS8315R>eLcKnI==9h0E6^{V6arbRBMqFGtkUDiDr+2$|E_5B0-Spi~(!f$;#C2mO zL0FR*C7JCU87=EiPwtk`t)RGt+E&4GZSSl_W~}V0>06zwt@m2?yCQ%(0&2m!SGYZc zRhRSy#&v4$O;+6lGWUZ_h0TK&rpXs7U&C*k0x?rRWzQf84tXG_kQAx3tes(BIm`!i z(xRU5M|$XKesTtFTyNLbKadz$kFxmszq~#V5kh$gsnzBP|1~=-TcuHp=~vIMFgQO!(U}6H)#b z;`zjjA`ZN|>(*o6EHcxu$dLCb^P@#3qX%&%L0;A6t}UrU8hrsB5r^T3c=O{YkI!0w zA0Z+nw(kh~yxITFTTWNQE@t-DGbRhr4l%&hLkZtLiN0O0nrj^- zkdf6i@RfS-vmy2OB;qrj=BuT`+a?mGqD)9Hvy^zMR}DW>!%)P}VA2^3nadN9Qc60n_Q{EL0TA-5}=E*~W7WPTWJbb=QvYma+GiMYmV0Z)gU$Xt6M zWwX0HE8vX-vJ6TA#VJ?@m6M%*YfT1)(&@;5^)UF~OZ)FrYgPQZQT=~xKtko-0`!St z^?)a#^`b`N&D~TVKooozM8Oio$n+Kzi~up_3ucr!9kG`fDpjG4CuS{pnriT7V^2M?9pQ1lN^|~@VDY!THsS`XKE`URLV{mnh%ejlH&M&iA;Pb5 zk$wQgF}wf(`}=?Tr|2SElY=scZ4Wt2JAe?{Vza-3k=T=>K2Y2asSFu~VIv}k?DR9DEA(0iehpH*&L6ZQJtyk2%1%tl zMg$jLU$=MXEB@cc3usW=|M7UaI!jjm_@Ci|{w^(kMvMYfsVi#ki3(gK?#!YhYWv<_ zIY7{LUYEa9ke&v~CCT~s9HGSu#Z<#2kM!3Bcr|aw%1lp*cCyllEGuu1d2^H z&NsI)azK-KHmI=jk);HXhZ66-z2dY0=c{#k3i5~%upWIk<#G-^5O^vpY8xKhg}?Wr zp0yV2*F85se5-PW4GX}BrUOza-KeD8nropKK8-e2D6kCzcwKo8>hyW=V@_q$&{ApL z>i%gCY_0l*m3Lf*bh!qc_|$6l*+FVO2Y^(y6;1EJaKYW&cw{qs;dDdTTfkw2NbBNr z%uHYpw?U>z$Y{vu;u3TfTO$b%GPN3M%g~eNIHSo87Q*W|AOOKs*8t%sxk&3w%NfW? zFfqU$KB+Hph~l{@0JU~2&TDP|Qr3dg;;&@8ueR>*T(4TiJ%if_psBJ1NSpH629y(b z79NWB#V5pt37qFZs4PTDQ&aqaJl3-C=90*bE&H(7@m074JOE%?jh}!dWt|9yIUXNR>)3dNLpdRINZPTG2{4XI)q@E|n5V{mz?8}ax!+p&9Bdl}b0m8Vw)B|2{g znVSKuANFemD5bpqIm0geQpO-4?z0nSB`wk#oz5#@z)SKVbY~qH+DS(|M2Wy@HIP7o zR?f%1CGIeqL)3R`^4Rex=`iq-H{W*!04;_bE*pyg_kHuDWvoE z2|MKQ(PG8#j7Zi&5v?JF4jH1DRyStCwRnK!x4D2#u~Ag_rLV08$VVr!GEfLI-Y0bH zIp1NdSP??(hw$m1@=H_LBv&f%G2#wlrZ2zkhgMuqd#DCG?h8JrtKZ~&bkB-6g@CmK zGJ0GWbFha2@uPD-L3>ED6*0nAK?`QD@RaN^XOJ+|W=%jGX#v%o96q+nQvyImTRRoZ zP!+hUDJC6!Mpkf)dkzJjFi<*C?tL&>BBZ97i7<*B<6a?tlr>^4zK3#6q5O-4y}K`8 zC2Eb6Wi++(r&Q9~{zeFq;}*JjG^~Izwyx4aP%{U1PBG*TP&2^!ii|4<%wzDL;6*(c zc?&F7eCm=D0A#Wd4(;}x4o5cG$rv2&fKF>oq=w5I@5oBy3y5KnJpjr@SOqJeX4VL(_{2s72#yaO;TRyJT45fXoi;GU8%=46m3f@Vt=& zDvMLA#hZONm9tPKE8s3wP`LCy(`azvL8?>)?(&gdBWn+P2HF1Ocd*#L=J1&0>Hu48 zok#I(IXdV;sfyKfs%a4lj_PqTp-)bMb}7N>$cXD^hL4`FuAs6|mt^}z8KgMG}tZynE({#5A z${|aifJ!-~z@U5FpHiMhM%o>LuvY4J*@LwD69``@Pi%~3u{KQ9)lO;}O{fUq%&Nr4 zS46c{PSxv3NPjw}0Z*BwTooH!x^_W9oM*p+xHkgL)U-rs_d!{@Lf%*6XhI~s)!=ER zk{#SMUYSOA9%%(PN%{&+!`bRyHm(t{(2NaAWNRD&)mq21whX^DWM!(Xs=KSM$Zr9u zXMzuh&ALNrZfjLg66T=AbC$ECuAlFY@JqdXnX@s?_IGomZ;XF%79R`BI6}%_p6fZ;9Mf7`Q;MQ3R({ z9-(OuuG*Ujk_C45x5MRB^Ed%75Ra2eoIz0yl$1N6UruQpRI0{Vn`oH6`6G}*K8ihD-G#iKk%v&Z^V zw?sIvjA#{hTOv@=ZU0yk2D^`uPK!yg_bZP*O9`z~^5V>_ zJf|kyrXbs*KB{a#`RUxZ6^$>0b|>VFxhBa)7yjTSFBn__b0HAx;K|bx^l~3tR0N9- zK*dFuJ06mB+E)q`6}CJ& zpu&*jY0dyUClkQI%+@(VSUA57(AH~6+AMVqMqa{RSqLOd8~m>+O0u+rdgIykjJWFL zhsV+%2!fYfXsj1jGWK#X7h=koer)0(I4?Ga;~`Y&MkcA?z^X1Huec;rfc!JOby^!3 z8vWi0WegUttxRCGVTBz{o+IuF{0(BlNu|-yCcbarcm*r7?`2XaV9Vpw zxqSU}gfywXU7%B1_{EUso^WlO8Pcv|KAfRh2V9TT8Vt_E4Iq~I+T^E@8+`(hw08uO+Iw7rQu z;Tlg_y_l;NoET*l4@d=5w#MR4B#B-p<3@Q2REDVVm|em)h%?x}4q>bEwyx{Z0j^nc zPnF+c!c(v4zy-O1rU64aX%G(=?7heelB$6nBFi6^)TJAmI7V?F77%$y`>oYz!MLD- zd925?n*OIaR5Epbk>}+QXkZPLmN{QOo-_sV+v=4+d9b{fS@|xI|#=XQ;~bqFoUd56wN;s*`(>#%PSHdMPqtOg5!g>#av6!-Kyh zI#6xXu`MEoZUB|Bw?o~>`UMje(9k0!&f7W4h6(Zi_PpNQ9D_kr5!{UgDwQ@}4;D#<0Cpgx-AfSJTYSo1$*X=z{aH%!Fxg1TN|=-k<|Xx$Z6Xm+$FaJO}vM2B$tdLqu@sE-o5SHQWOle03CxYU)kf9S!HoUR-t(|BFY8E27d9>^aZHSRY z0i@5U?FeKfXMA~h#^f7Wd79G`V&*v`hSR`%FWLDW*yxVNNrQ`WPYNgoq%}ie)2?q? z7x`v`|Bvj<`~jmN&=8o$fs4`z-n*O{NF! zG-+LURF%l8di7He4{oOynC~6X?P1qw-)L72D7)~b<^H8ZC#+3G8PMt+aS=|ZO2ob0 za0@fw#%UYf`ScHKcs>Q-xM@3V!!B)$?4<5MF*XJUc z*A?xDCv`-xlo@!^4gle(&}+hZf2dk*iLSALTOYGtK;7|e0aZ0aFBie<+x^BQ18upec!JobBm#FtoF7ZiQ2Ai**8%SAlYM>w7j|#el zWdFQdsO+3*dB({6bO3_}BNde))UcSqI(RX=i8@Y`lp6e}g-fTBc?Gm)RU|#iAt@b+ za1aL!__o7HZ2zk9`T{s{f|lSBGF{4jK;mi+xaiR>q^~8Qp#LN<_IJ2w=)e9KGHT#! zr3{8(|A}wnu}5CHsCP~#BnH=Y=xDB($ecrCs0( z+_1#IhjNFDsK(BSJ;{=77usoDQ6viJfZEH-(F)W=)5qP9nZYMqgj}%FMfY+THU2w4 zgvrK4Ul}y}_*USF?QT-ht1z&GCMO<@uITo?@tNFXlye3(EE+^Lf!Kc|V{)6q13=Z& z$2p58s^*Vng)9L{A~E-FIbd3L?ToNZp`hMzN0ff&hcb0~cd<&(ogWI_J&9azZTY0R zD>X~8MF7(8#`LwyVLufnA3n`MaUle9T3CFnE zFwXD`H%FX1=IiD(cF6p5{q!n~7xLmj9xslG=FL14aSP?exl#sb0ob$miqLLi3Q`Hk z!XK0lNkduqdtYr%JF#_SDu>qHw0QUUFv-aP-WfyWdJ2vd9t!`as+o^p8`rCrCqq=< zIk|q{iucbe0GHl&l6}?!{9!c!YD9SJ_ct(+u>EDKUh)hU=CF5ruYT^fD8K7Pt&m4| zz!~RZ2`txR$g^M$B9JDP9-VEjVx&*tuq*@EVdEX7uf-!!liLGkuTRAg1c=U{YD}0a zMr%Jjlfe$Pq`|!v^Z+iEby=+`64N1dwL5_H&dH-;Q;?S#XaQWZT7?f{((piSJ-&aY z^c!&oYGQ8gVMEq)&jJ{{?|)|j?CWz)#05CaNPeuGPVisRJb3H+A1IE##OHq&cDi09 z1xM~*Sx|rWn!i6Q{q5J@I0tJNo$H{%kMK@hL=S=RLixRjC-d@ z>H!czB;z%4{b~1w7@gs}qH@sxv#HcTIA|R{!b7;^!$k5F1&5gh)SF$ESw%(O55Ea< zd^w7Ww5W&azvz)a2O|A=G3iIJPyci1k^j;i{uk*E-_3mc|A>Y2-vRl*I3U+?BVgLV z`l4*F(cQntjeyh6(DY&_q%x%j#4mxkxdf6Cz0(f?$-o0hhDD~E(CV0j+F1Uo=md}v z;4YN@nRx(u!0*#6|MOk{w;f&d{mzonwyb4aaFzYXd+i^F0QJW}wQ2k>IqE(7F-JWR zcr&a^0sioJ{iEx5+k)alXpzQDp#suLyGwUO?SKXr8^^yZ?(_$!2Y{@&c(RPnfLh95 zo(h74ZFqoswAINcNtq?=ybR9o9j7gyqa+#CA}#UbbL&;-bO z3I{O&G0O#4sV3ysk^&<2RXt!H$E(@)`*%Ptr@e=`+=I5)jht=(nGwxF&LRiZ2%wcP zcsYF@|;)?_tl2(o$3&&PQSk)R_mUcZ2M=VsyDEv3E@?>%5E*D-=wjdn3C& z$7~lMdQZ6)IPEq`mINt_S|DHDCh%zYhMd-NKG8SQzwgu!AW;z93$BbosImG_@)=piP?pIENemnU% zpj5EEWVl&EG!9lDH%$vB#U$ll49`FrBU$aL>rePYf$BZh@2+iL!?!yprQ{_FE{$fW z@$3mBgF(y>7Z|{GkQlV{BE+^U$DUmY+@Lj^ecUt*B!Kgb$dAZo^qjZ05XBrM0YG=1Dh*vePy|9m<{T zEzbva!8;8gGMEIYVkgsf5$;mS++43j8^ri8TTH-E^1NOEjkD^&<5`J*GkU>}b}NsB zqPQ%$^n{qC7S2w!Y60;?24g-q-htZn29*K%tzt5}3}bG^!icyZM#}`E$k>$8>do&1 za}a;do0TV%qPk`>;B#w{xNCL4K$Zr}73t?qZv&xy$uG6Ip&|^8verIG-CH6QkMA!8 zl#m^T>D}@uki{W!0Q4;-YnPHyf1DunDbslr`s=H1o%IoCFk4xivADB|e98>`%q^g! zwKx}Y_Vp9@jK?5NYyuuwQ{D^qrASyl8chyBgUEPI{SF` zL4ajDaFZM`!L#R!lpl~omYo?6QtK2{qQR*}RhrD>L`9>f8l6IP>w^QOb2fcL&-zZT zGwKk2WAAMY-ON;juv{0W5nxT5aub%;K*nU@={bn)u`!&?o^$VT{2T(b&@CeG0qDIp z%C0}N&DJ;Wx;}A?NBCN5p&(J#vm+Bf6}2;+rldW1sei)*_rqmuGL(^dJY~^6Zz2wS zpwX7NS@Iy(uh04@3x~|PB<@{$)IXipzRvwN$g=zaC67Cm`|aEMw5-u2%(v~j)Rt@) zH6mb|Ikx7_2^y`)COS{O7ild|?9v^#xw|0vw%OMq|0~Y}zj@#V7c}^iI|!_R3j6!9dBdi`&iVo zEA_$&hwg2SX65vJ+@S+*)jJ$MYzoScmU$}4NFPpj z$HEQ4<39z%kTY(F?26l_x6sRN{7%+9%E%cX=?#)t31GXL4wMUzZNlr|3 zMY-(lVG`115m$qmV{w_gZl*ckD0JLQM7*XY>e;s18Ns?=tjsW#hW{My-eRiyUL2-M zb??s`T-k`X9?d6p<^74azsXm+TxZ^UwMcEy3x_G5E1V!F4_%nK%PMp&hYM>$?KmW{PBoM$fqmge3pwgik zU*)&GyQs}T2rvPCB$!;qL~whD{g;(n=7FaPo*>S9 zjIF~vgoIotgq~$wdRM;v{UEl^;eK`3N=Fwx&Xm>Ltd6#Gd4NyW8{|*fT~-#H;l=?I zvHS+J1(C;6mxAUcG6tg;tRJsjgiFfGrQ|K13cWxO_F?P_TXPns(~7X_kO_f zYJyG8gnQZ+NQ*`|9n~{{je^@=?EPl-QSX2U3lty4;bA$o*zLToBQ&Od-9r^udSz?; zTBQM$wAWu8OM3X(=mC;;>iW?(Y)WBDrFWgCZ}vC?$&)u@`*n?;v7H>FfuB_^?YSW1 zzu-@|Y~sB1K297nEdQ#5hA@&>6SgoOn){3SRHaRRwM`*hQ*rR>y!YCzocFp#7X5wx zbR2{ZRzm1ui#uKB@M4p|+(-*}aeQ^ngIuQ?iz@QKTnRJy9|_A%al-ynQG=%Dz9H%l zEn;fI7#HjByJ0ITjDUJ30qUUndi_)IQ?v)`YP@~Imt|@voL@6D_#Dss__TXGV8MSd zyu4~{v@v|uauL{D!rXiV*g6o3of?CkYsfczxo@_v(p#X;+f7R%L(jyepd2&KajQ=- zX@GxFFm*tzY558ahY*?oN| ziVzQHFH}@YZK=Lf^1{xVp{2fIpT?*uo3zpXlUl@v)eDtI)y6XiJ>-uTd$uPP3>A3C zuO1DIz}A@uh#e5U>hAKg^7xquX`-628w2eliOch&Jhmg_r%QguGhE6l!x1-n`3Ty1 zEO`xBbinY72%O;Lx0S}tK|K00a)?XBWCfbt^E%}gCW@B(Pn>uU*Ge5Iyi=&_9%t82 z)2|p;Kh;;;2Ik`rt^C5c{7Yt8*s^me8h;wRU6Wom(LiS^f6v$(7INM~TgR!tymkVX z^=qe>ZeSm?XG%SRdH5Gp|m~qQmZk^-tm71>Qnol1W@mGVLCA(%)Z0H%f zgmdBUA6OadS{>^)oA6a}I_Ti!v4FmbcY)($?lms^<(00rm6KAdb@(UwFWR_74y>Hs zVORf&bXo}L{Ayi`Ew8jEEHVb+dBk?bq{73jWllX*Y~3^(oNZsSk>MnM%xa2e4x&b( zF$id0@uu2$jJU;qC>U_ydxzhUA0>EeK+qRktWq@P&r{4&#v&T=++{KOWPCTNq1JFC?DQZQ?0s(9=>P;+KQS83NtiXL;ukR||6#!S2=Aqm;U1s)DwBI0{LHJRQ!O)ef`0sD~gppa3K!=9zqC-}`fnLZsQ;=wNE-wwM= zLz`51y^q3R_i(Yff&O1v*|OTW_~-{VmEk`IhOiSfgb4IBYvU8ECi()^aKP6A+}GjrM8{0*nh$~cy(HK%`%5;sI_P| zv)j>E(0+h})n-}CD zJT3kzs)}H3>vRG_j7Ej9DL+fttztzMd_`!jT_M15TJB|J9N)W$Yb{{G%%*uui_HJB z!%gtPz)bP5QtHJz$8n2Bx~}=M{de+V+kBp-7qk2sdKJqDB0A}$gZtQlb2g* zU{f=GG*>;Kbp|gF^Vl5&(w_Tn4Gw$WK9ScX9IFXyn|&;KoOVFKwtp>*Q8k>=);W9$ zA2-nY`W*==^=`T1U7L9Jxz_#^+nf%m*;Cv+kbU1TIW4y2g?69F zN~1{3eUatvvlhJ@8Rtvro>W>f_B#FY)1K(%%LIq=>g7t$z}$+HU6CBc=DoU3mJ5AW zA6z$YCwEye>sj#WMSILA{aO45#ChC79_8)O$9!K9>jXNb&Tbf@p%}ta_{=lC7`}pT z^7rMnHg_E$AmMkgF47)Ano92A)fug|nVW+(`00hL?7|zjoK5x$flSt|P7SseMw_q+ zDV)~keK$)!&CeFZqK)R2#z_L6@Pq*W2|Wd2O{(eIG8zX%a1^3hof8+H@`;?}G!4{} zbsSbuD6-IJ>xEaRpNj7B|AVDvjRQ?5Ig~ z(5o&e!#CLiq0!iRt?zi$0%ISxu0OpXWw2qC1)>lm$2Vsg9#3_s5k}s6J5AT$&4WWJ zxy@{~XH4!dGj0Hqb4LMr(=hX+vHZw ziWdc5p0Fui>ge{0To}he1V7QVpU~S+2<0U(e+VoBi5cD!9Cbw;2e`s%aYCT_Yv>*i z^(}9l(EorJwF6q~_TK)ow*^HpsanfDdtPbx5N`Rhl;WxLA-0P{s-Ig%CYCE;YKzI~ z6MsIa(s|rF7k{+NfV;Se5IR97jkB#5>==@H71hU@>cCfv=jN|C$oF7ipb761%*YWQ za*Z|BAIraMevLN^n_tjLKenwg3l-GDr#h6@CK=bv^o46v9kT0|Y34xsa-)4r5CdD_R>_t z6BEtO{hX%hZG`hD?A|i@F_!k9cM;u3)KzmzHE4_LV`6lH3ad_opaKE zZ^gFLm)n`RbzdW%jiSiB}6Ku#GkU+ooH)y~Q9 zCognbX?1qI6>VFg5`ByiHAV=PC(x^d{&_iaXci*;D|xTz$K}i`${>s(CuuS@av5bN zwLB;n6*l%Db^a3TpdTR<@6hUWf|!K#nZL_ki{$9m35)52ewlUTVYy~p#k+U!c3V@I zXpC!5GTzSa5esqAbzj(mZ+Som4H3MHf=TrAA981z_(%r?6^q~;k<77-^P&YaCFwn@ zNjo0;^`mus+-WX9j<0y3=2iGI%{)3fXzkb?F7f&OpFb$b$Q3p?-+R>!)BLRu`D}?# z!)9!cj8H0UXJdQ0be8R5u+w^B&=zVn|-@g8L|5Re_+Ky z`*rmON!E?`0aM|snmA#-w^N$L8QZwqMr}d290w{#Lu+!i@BAd#<)J-Q1lhE69Tl2M zkd?M69MNdB2qQcR&dxj?LcAXLj1LMz&p_i16N?buJ-K2aYHYJ;jFsNl)sn2%`rvIj z1=I7LFwp%E+wO2(pn9{$tFr2J;`s5}uD+U}5-XCaj&86_8LhJ>Cm!2k{O@L)2M1~!vzvj zDN4D^ZRbf}utl_G)TrBQ_*1PFvY2*YNKg5^KU>*yS)qg@J=UZk4HRKjuRTbo55t#p zSQ)X%$-cE)Y{&WtdYjTM{mqN;kK0#))-Uby}w&bVa;C> zc4vM`W|+L^MX%}Z*(Jc_*Z7F@H{ZZm@NF6zk zeW%=i^c$^0FmXKp(VL3HsI>R-FIZ-(wC%e>BMs+_^WS~W*vD)%Tt=ssp;sVP{Xo4-@#lE!mL9xh`zD|^Pnp-Gd&`~mki7$w}? z|GrW3z{k7&sMvNNY$WUtZr^=w_VVGe>h(-+{@o7Sq9U3xxVX4v`TMx!AAUedI_%(MOuq|c{^1)oxR$^> zHs04Z{44Ka3BiC|JaK;;1heN~l8}<@{M124?Uq1s`0%HPTFVFeC*~mWf90m)S<1_d zuHcExq5)4~c5tS9E`cO*>-qzL^1Mafq^GI+j0KrYgc96tW#5j;JoC=nS{sw(RD5!P zC^yWZ7w4+)mpE*T*VFFjYAjY_~zLC2<374P@I)Ak)fZc#FP%}HIT3--Y@`Uu) zz5K+nW7U6oAOJ0Hcr!OKM3TyfBg)kGH|;ByhIQ-Hf7h)aib(z+U$?$H#q@zjRRKzz zI83kZbukhpgbO`c-DxpWIaKw&Yq?6_%^oeUeHgqAFCi|-l` zIeV0eLP8Wn$L7i1gj;xGp2CM}<(GZAAa=w-UW1o@;`u2EwK$c(gj)LPZ5jK#VPE+E z-FW0^M4N@U)a7yufjWAUvcpKh4;8%WtV;zH~f>!VtOtL zDUY}(=9QvOaQ`>Q^6?O_Y?(Ki4b0s&xBnAllKEl#1|>&l`R(L%1J^l7fDA#Z#(O1q z$d5fm#Z}vjp9Oiz3;{A?!oMVM%d?9m9Og<67TIRCOVIjcV{&8sG1oQX8xSnDS2h}w zKFfBovo55{nH`j^Wg9fgwSg>`wsfsADTV$Os4H3)yy`)rg)Ur+3 zS#_Zrsm%T%KiBZgMha54=QrMG`4x9PeE4wxpxz}CEakg&UCH(W+l;@3b9B7MG(}z8 zwr#WE)NVXTAz1=XpCMDvmr!d}WMPyU<0

Vop5Fx3n3~zdto)9O6+CY3V~_2byZZy)8%)z3wyOadfR0T-#RLiYtvIyKD z?aHpY0@=Q`aUM-LFx?VV+sSk?%H^e@*hy1XINRYcg9i^cs{6Q5tsT8S#X@Ys_Tk*P zdKtFCm1H0D#(Qa-6yA>JZh9kpdxL8NWIGbhb&9%_kinsy_Ib;PWXOm9pZ3l@Eavq6 zUTfcWi3m-*Iw86_xoM^6|R})@cBH?eZSwY_x;SE>qBFwi(EoA5Umz3 zF@oa(;m8%o+q>~jSe&mFaBW{Md4Ti^t(uH0aAi$&Xt!p-Q zlg5T$5N|CE;B-EtzATD8LG9KsR>A><6t&!v0JdP7(Vg4ST8V&0y2)v9B~QKWQxVGW z*78<1jHcOJrPahI{H?4H@{e~HjY9KRlkvNxkhWvd_~7?2JZDc}wJ%x3Q7J{+aRR;w zrd;OZN)_ff_+AdxR4VVIY*a94k7uJbUm{~G?C>6b@d*~y4Tso-$+5EY33MTehgqOJ zu>3>gU#mAuC3NbEYs_q4|8Ee*KEP=kg4;${4qRd7n&8(bwH6@mq?bqk~eyNEfVBe4!7(JE`yk3l?M(|hb3p! z1j@%V7VpZ(uS(q)X#DRfbzeK)uX6WSx%PkP&kq5rH|{@N$>@K{qT-o(Z8JkK6}m2JP5UmW!OL+H)U>g5-* zGSMD)nUv5@Oa}KSx$`A#?-tbq2!^@&nQjcd$co;@kpG|dmYGoqgt&JYxBswL zJnVMoTRHuG70a{eVfPZ1WfqcO@ECCJ1*cnXpm&U!GRV8Hm(6cy`ZXA&{ay50G2i8M1d7v@N z9lHlgdtcEL{332Y6#Hd%9HvrTiO}O;;*Q(f+IFA2)423qi=TH|UG`}44kDZuJ+Boo zxY-+cu;D^J;me3hWu*3e-wW*K#$};yFL}g-%!)P%{kld7yE>69@Xa&pVnwK=n3u7``i7BkeF!9xL$Dj^{h@$FH66c(F;er_HzOL9hgIlZekM zzpCGY%`QUK6Typ{RZ-JT-_5Z6UgB!VsO?F+`IlP zqCYe~_m8eWBj^+5e^2tbvS#l|Q=^>RQrQ|f7H(CW}!iCtVqB7ww@Hwzi0?}ZDkw6rvaaIY?iTIyPtP;}i@^kKK^?Q^ov=M48Q zQC_MbnPCkb)NX0FF~;m=l4Fgt;5_iyoX5f^zDvZ1oevLK5N`@p!?k9><3H9teRv&N zuXq7-=C;Fp$-{w&_2V*Y8YU;$lPOG4)>yzs`URF-kPj8O(73rnBBtsGb2&_J?(g2o zl94K_3WiJN7!yy11PQ|>tRv|-yU|d>aOhz`4paDmzht#Xo0>r8gIa@xwSHXD5RP?B zF32gcmc)obB;gT%Q+x~;1lAW$Vk6^Gfpr&a6euH=&j={=!~$-N5cnv}K4!iax2M0D zPjkcaBX#Dw(JvwD5<6ezX-JalfPrb?8MYoDtea}syo+l_d+Fv+&Ng0vzP3g0z@208 z8O=S)D{hkM+7n_Fr7Z;&zsXP8Mi>vc_3R{HEX0S0jZ6RBr-b48WiDt=(Z`pX!56vx zA6&7DSsk7CgZOA{;Hy}`@pjzswjW(Q1TSCT9h%3b=hZ2k1ZFZK&ve_EDB;o%5~Z9S zWI{{qwutwg3QfKYM~Sk4;B*T7U4eXh1t8~_(WrWK_PIOEG85O7Q}$)BO#H;e^iWLX zQuE%xXj1cPG@+|xxt{MVH+;wKQ=J4dN!ORdYCn*am&ZvG40bDzNX@(6e^i|7I0|Ta zaBIZx(0MDZ@Nuo+v}I-%%jQVKRdf57rajVFm2H;V0_w>wwBl#+V#G**)s`Rgy24Yh zt*vpJ_r5~V&tc;Vjsem$F?!Ja=&9$U4 zL&Ub5qE~@~gvmPKAFeF%m4UA6&_D1GvVt+lFm(9mSKc%~PNiQx_rAX7M)jY@@6MH5_B#9Aq=ND>{7Vap;n1yy zg0#m|AemqkftHRqdDd&cB3$FGBOZ|YYy*g#~Qqb9;eiskI z$`!%aOz(r3gp5zcD?^A$q&?4mG6$B{g=Vt^uh12IR8qDjN`h6b?_~2Gag&f4Ep2As zJn?6HCg)i(3`4ZdVRbdHK6MmCiUnaG2TS$`B*M}utAUnQ2$)FK0!K~pC>U|o8l(5b z54Mh6Xf;5XZ#uk-Cr+UP#<5Zt4%ZB5N@xX+mrJJ`!8IIQJCkqO2K7db)^QH3DW;?7 zzNUf}5@k!N?>+}aG^we1Z2{~tMu-6pRXbe>#J1Q9ajk&m0qv(t^81`w1&vU%(0?o} zpjJoG7{WOWGRp6)^YWms1YuHl)9ZM!AmKD+ofwEHz-B_A>j7Jb|>c-(K-zA)Xt)F-sM*=i)*Rb3LLIoFBz zlP9@M!6>`Qds3RG4?tGHp{tsVB40I0p)1Zuf>z?xgdzk}3pNRChLEG}$C+A^Ye)}2= zk_N!u&HblB)mPgcEM4dhqoS8;cI}hH(Qm@tpPQGYSk#B>v(rP{im5}EUu8jYTw(vE zB;qg(VkVE>w9;0?jPU8=y+CnJ(JL5hVVjIZjR&k%@?HxX9E{%F8(8VQLgB*#`g3+A z3_*T6UArh)fkVlw4uXOgOIrDOr^};Y zpl656P)ef)YO|~Y;^9WV_HA`=D5rmJCKIIeAZiYi7FgwBA3YW7@l}DGLD~@6y=@E! z9aYRX@!TdSq%dYsZn84lZNv56mk&QQ8!eGjdh@4jDP*E0T~}1bX>!idl6By3LYQdq zDE2A_A3ssL`ol-Q_b-mg0GgimQ9|t#aZ>ISNdpp|4JjffDl_Lo=d%TCn?v{AxXzfe z^0_?!XL`SQb5P{N9T_I*wS%MylZ0zgKw5nfBMkm!0ndTo;JK8IDGPRNAtE{%6Bk+>8Z^H9=-;Ewg#+{$D?hQN6HE1Zxvf^4NicQ8?R%XV&UOH@Nm63CoLP z_VKKTECG0%u)4#5?T9UjA;pFy4Jq+W?xP4YtA#-i&V==+h%2#MMf@QPPoA&ZeyTr& zPhFz14lpTOY!Kvg{K6B$lFN-YGRLdD@wIt3HzsUbsSO|JsMqR*Dy-0xq>wH0gGk8vO+lry>a4a*UG7Q$5 z@md&xp0G$%)Rj@s%Y(hQ2$Wb|aA$q3t*lOHm}g1~?L@>?iQj$sjc(e=4uV35r?~jplcUfLnqK+7&xveH8i&) z&R47}PG@58&4$rYeePiZLwBA(Fjy(N66EL;lBXtVYLmS!;n64034OcfR8d1=(vy)Jw7|({!Kq2s|z8{?Jv2VULI9s&e`}8VaP^}*Sn-y>Nyqw^V_wisEu}Hx_#oi zVCiOds&yLOS09|1fze)7_5+d@ap@wTRSgUs$0JZXrE*9))J?c*n?0T1Ll$+whLmT$ zkT?JqL0TL)QmdhK!YUwMfJjDZuFKTvgv7h@)A#FFkt+bTuJ0;i=1X}`@EouzK52{N zBR&SvDXrR@kzL=d>V5K@9OTODyuCX2@8YTd9tv{%KD7y3(J|)^CnZ$cIA>F`r@T^fyN3<0b<$cvnDCB|*uXpE6_%4ilzzanDyv2s4>)eqb z;070oOd{S!K2}*+A_FsXpnJ}#`X1T!gXt@?qgH7@wT5bC&<{JUUkmwyuN3l!`Gx@%WfbOihxsXU zRJEd4Z3rtig<0$|lTRqF<{WzrXQdBnLP^c-YfHo-lI0@m6Q0oGNzS<+z}(!LcQe4` zQ8RyK5$sD73RxJ;X;^Je*KD7X=~~fH11MK4u01#h85Ltt4M>rGRtbN*D*^oeA+ik6 zvT19k!~xK8^9m<4FMNq$d<_^YHSC`o{xlvbkpV>l>ioapF{m(*PRrf1{YNNGqc_aN z5P8zzEGQ=><=gZsOjY?#a3rb4sC9t?0SB7z{?Hp5#3Q1)&<*SWN7jDmTO&BOl{l4G zzfrONBcC~Y=pR4xbY`bV-n2gPJonlZ9XF6{Ke!~O(py6~XwuucM!yuxU~IBI4M@t~ z*;=%pItzF@w>p*_7bqFRIXO9fu@wL_e?EuU|Fy;ZWXAX3)NTLefYMj`^k1P*KVAj> zJE67z-;4g&o9KUieha#=!Y(_u*w2xgqc1+-V$jwJyiU>6*>c!Fc4g=qqY<`6r?~3C zzU;fI=SEzCj0PybMxo0N-A|}|qR4+w*e5(ZR{s_KjfZCK<=!SDk&zHGS~Z~)qHO@8 zjzQJqs7+G|92nc^H!usBx0x8}Z_J$2g!V_aBKg|?FY-0K$g^$okm8*7=Ij^@VtQJ7 zktq@mru#S<5F2XeZ<+oBgs?J+ZzS631pEdwzQyK>7-TAbyiR!r&V-jx?)h6d`^hWq@QWy2K z*ltRKk#OHU$@a1Vau0m5KSFYz^c(wB7B~{fdQ~`n&=z23-EsC1P`g1!Ph|LcpuL&5 zXnP0ikIQABTM1k)*Whx2FXL~Q3k)1d!)r@Ijpld^dX9x#s(PD%_r{uju510eUjN_D z+pak+Q8Qx(uYti_Z ieeF#C)5#LOgc)V}R$Ez1DDpGlkAa@a*0e2-Km89GJ_jBE diff --git a/unstructured/ingest/v2/assets/sequence.txt b/unstructured/ingest/v2/assets/sequence.txt deleted file mode 100644 index 618859a6a..000000000 --- a/unstructured/ingest/v2/assets/sequence.txt +++ /dev/null @@ -1,38 +0,0 @@ -title Ingest Flow - - -Pipeline->Index: Pipeline.indexer_step.run() -Index->Data Provider:fetch list of docs with metadata -Data Provider->Index: -Index->Local Filesystem:for each record, save the metadata as a json file -Index->Pipeline: pipeline records a list of files -Pipeline->Download: Pipeline.downloader_step(records) -Download->Local Filesystem: Fetch the associated metadata -Local Filesystem->Download: -Download->Data Provider: Get raw data from data provider -Download->Local Filesystem: Persist the data as raw files -Download->Pipeline: Send back a reference to the local file to process -Pipeline-->Uncompress: Optionally run if flag set to True -Uncompress->Local Filesystem: Extract tar and zip files -Uncompress->Local Filesystem: New metadata records are created for new extracted files -Uncompress->Pipeline: Send back list of pointers to new metadata files -Pipeline->Partition: Pipeline.partitioner_step(downloaded_data) -Partition-->Unstructured Api: If credentials passed in,\npassed file data to API for partitioning -Unstructured Api->Partition: -Partition->Local Filesystem: Persist results -Partition->Pipeline: Pointers to persisted results -Pipeline-->Chunk: Optionally Pipeline.chunker_step.run(records) -Chunk-->Unstructured Api: If credentials passed in,\npassed file data to API for chunking -Unstructured Api->Chunk: -Chunk->Local Filesystem: Persist results -Chunk->Pipeline: Pointers to persisted results -Pipeline-->Embed: Optionally Pipeline.embed_step.run(records) -Embed-->Embedder Api: Depending on which embedder\nis chosen, make API calls to provider -Embed->Local Filesystem: Persist results -Embed->Pipeline: Pointers to persisted results -Pipeline->Stage: Optionally Pipeline.stager_step.run(records) -Stage->Local Filesystem: manipulate the records to better upload -Stage->Pipeline: Pointers to persisted results -Pipeline->Upload: Pipeline.upload_step.run() -Upload->Data Destination: -Pipeline->Local Filesystem: Cleanup diff --git a/unstructured/ingest/v2/cli/README.md b/unstructured/ingest/v2/cli/README.md deleted file mode 100644 index 4d60d4ccf..000000000 --- a/unstructured/ingest/v2/cli/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Ingest CLI -This package helps map user input via a cli to the underlying ingest code to run a small ETL pipeline. - -## Design Reference -[cli.py](./cli.py) is the main entrypoint to run the cli itself. The key points for this is the interaction between all -source and destination connectors. - -To manually run the cli: -```shell -PYTHONPATH=. python unstructured/ingest/v2/main.py --help -``` - -The `main.py` file simply wraps the generated Click command created in `cli.py`. - -### Source Commands -All source commands are added as sub commands to the parent ingest Click group. This allows each command to map to -different connectors with shared and unique parameters. - -### Destination Commands -All destination commands are added as sub commands to each parent source command. This allows each invocation of the source -sub command to display all possible destination subcommands. The code un [utils.py](./utils.py) helps structure the -generated text from the Click library to be more intuitive on this approach (i.e. list sub commands as `Destinations`). - -### Configs -The configs in [configs/](./configs) and connector specific ones in [cmds/](./cmds) help surface all user parameters that -are needed to marshall the input dictionary from Click into all the respective configs needed to create a full pipeline run. -Because click returns a flat dictionary of user inputs, the `extract_config` method in `utils.py` helps deserialize this dictionary -into dataclasses that have nexted fields (such as access configs). diff --git a/unstructured/ingest/v2/cli/__init__.py b/unstructured/ingest/v2/cli/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/v2/cli/base/__init__.py b/unstructured/ingest/v2/cli/base/__init__.py deleted file mode 100644 index ed07a1684..000000000 --- a/unstructured/ingest/v2/cli/base/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .dest import DestCmd -from .src import SrcCmd - -__all__ = ["SrcCmd", "DestCmd"] diff --git a/unstructured/ingest/v2/cli/base/cmd.py b/unstructured/ingest/v2/cli/base/cmd.py deleted file mode 100644 index 0a5d5c138..000000000 --- a/unstructured/ingest/v2/cli/base/cmd.py +++ /dev/null @@ -1,215 +0,0 @@ -import inspect -from abc import ABC, abstractmethod -from dataclasses import dataclass, field, fields -from typing import Any, Optional, Type, TypeVar - -import click - -from unstructured.ingest.v2.cli.base.importer import import_from_string -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import extract_config -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import Chunker, ChunkerConfig -from unstructured.ingest.v2.processes.connector_registry import ( - DownloaderT, - IndexerT, - UploaderT, - UploadStager, - UploadStagerConfig, - UploadStagerT, - destination_registry, - source_registry, -) -from unstructured.ingest.v2.processes.connectors.local import LocalUploader, LocalUploaderConfig -from unstructured.ingest.v2.processes.embedder import Embedder, EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import Partitioner, PartitionerConfig - -CommandT = TypeVar("CommandT", bound=click.Command) - - -@dataclass -class BaseCmd(ABC): - cmd_name: str - default_configs: list[Type[CliConfig]] = field(default_factory=list) - - @property - def cmd_name_key(self): - return self.cmd_name.replace("-", "_") - - @property - def cli_cmd_name(self): - return self.cmd_name.replace("_", "-") - - @abstractmethod - def cmd(self, ctx: click.Context, **options) -> None: - pass - - def add_options(self, cmd: CommandT, extras: list[Type[CliConfig]]) -> CommandT: - configs = self.default_configs - # make sure what's unique to this cmd appears first - extras.extend(configs) - for config in extras: - try: - config.add_cli_options(cmd=cmd) - except ValueError as e: - raise ValueError(f"failed to set configs from {config.__name__}: {e}") - return cmd - - def get_pipeline( - self, - src: str, - source_options: dict[str, Any], - dest: Optional[str] = None, - destination_options: Optional[dict[str, Any]] = None, - ) -> Pipeline: - logger.debug( - f"creating pipeline from cli using source {src} with options: {source_options}" - ) - pipeline_kwargs: dict[str, Any] = { - "context": self.get_processor_config(options=source_options), - "downloader": self.get_downloader(src=src, options=source_options), - "indexer": self.get_indexer(src=src, options=source_options), - "partitioner": self.get_partitioner(options=source_options), - } - if chunker := self.get_chunker(options=source_options): - pipeline_kwargs["chunker"] = chunker - if embedder := self.get_embeder(options=source_options): - pipeline_kwargs["embedder"] = embedder - if dest: - logger.debug( - f"setting destination on pipeline {dest} with options: {destination_options}" - ) - if uploader_stager := self.get_upload_stager(dest=dest, options=destination_options): - pipeline_kwargs["stager"] = uploader_stager - pipeline_kwargs["uploader"] = self.get_uploader(dest=dest, options=destination_options) - else: - # Default to local uploader - # TODO remove after v1 no longer supported - destination_options = destination_options or {} - if "output_dir" not in destination_options: - destination_options["output_dir"] = source_options["output_dir"] - pipeline_kwargs["uploader"] = self.get_default_uploader(options=destination_options) - return Pipeline(**pipeline_kwargs) - - @staticmethod - def get_default_uploader(options: dict[str, Any]) -> UploaderT: - uploader_config = extract_config(flat_data=options, config=LocalUploaderConfig) - return LocalUploader(upload_config=uploader_config) - - @staticmethod - def get_chunker(options: dict[str, Any]) -> Optional[Chunker]: - chunker_config = extract_config(flat_data=options, config=ChunkerConfig) - if not chunker_config.chunking_strategy: - return None - return Chunker(config=chunker_config) - - @staticmethod - def get_embeder(options: dict[str, Any]) -> Optional[Embedder]: - embedder_config = extract_config(flat_data=options, config=EmbedderConfig) - if not embedder_config.embedding_provider: - return None - return Embedder(config=embedder_config) - - @staticmethod - def get_partitioner(options: dict[str, Any]) -> Partitioner: - partitioner_config = extract_config(flat_data=options, config=PartitionerConfig) - return Partitioner(config=partitioner_config) - - @staticmethod - def get_processor_config(options: dict[str, Any]) -> ProcessorConfig: - return extract_config(flat_data=options, config=ProcessorConfig) - - @staticmethod - def get_indexer(src: str, options: dict[str, Any]) -> IndexerT: - source_entry = source_registry[src] - indexer_kwargs: dict[str, Any] = {} - if indexer_config_cls := source_entry.indexer_config: - indexer_kwargs["index_config"] = extract_config( - flat_data=options, config=indexer_config_cls - ) - if connection_config_cls := source_entry.connection_config: - indexer_kwargs["connection_config"] = extract_config( - flat_data=options, config=connection_config_cls - ) - indexer_cls = source_entry.indexer - return indexer_cls(**indexer_kwargs) - - @staticmethod - def get_downloader(src: str, options: dict[str, Any]) -> DownloaderT: - source_entry = source_registry[src] - downloader_kwargs: dict[str, Any] = {} - if downloader_config_cls := source_entry.downloader_config: - downloader_kwargs["download_config"] = extract_config( - flat_data=options, config=downloader_config_cls - ) - if connection_config_cls := source_entry.connection_config: - downloader_kwargs["connection_config"] = extract_config( - flat_data=options, config=connection_config_cls - ) - downloader_cls = source_entry.downloader - return downloader_cls(**downloader_kwargs) - - @staticmethod - def get_custom_stager( - stager_reference: str, stager_config_kwargs: Optional[dict] = None - ) -> Optional[UploadStagerT]: - uploader_cls = import_from_string(stager_reference) - if not inspect.isclass(uploader_cls): - raise ValueError( - f"custom stager must be a reference to a python class, got: {type(uploader_cls)}" - ) - if not issubclass(uploader_cls, UploadStager): - raise ValueError( - "custom stager must be an implementation of the UploadStager interface" - ) - fields_dict = {f.name: f.type for f in fields(uploader_cls)} - upload_stager_config_cls = fields_dict["upload_stager_config"] - if not inspect.isclass(upload_stager_config_cls): - raise ValueError( - f"custom stager config must be a class, got: {type(upload_stager_config_cls)}" - ) - if not issubclass(upload_stager_config_cls, UploadStagerConfig): - raise ValueError( - "custom stager config must be an implementation " - "of the UploadStagerUploadStagerConfig interface" - ) - upload_stager_kwargs: dict[str, Any] = {} - if stager_config_kwargs: - upload_stager_kwargs["upload_stager_config"] = upload_stager_config_cls( - **stager_config_kwargs - ) - return uploader_cls(**upload_stager_kwargs) - - @staticmethod - def get_upload_stager(dest: str, options: dict[str, Any]) -> Optional[UploadStagerT]: - if custom_stager := options.get("custom_stager"): - return BaseCmd.get_custom_stager( - stager_reference=custom_stager, - stager_config_kwargs=options.get("custom_stager_config_kwargs"), - ) - dest_entry = destination_registry[dest] - upload_stager_kwargs: dict[str, Any] = {} - if upload_stager_config_cls := dest_entry.upload_stager_config: - upload_stager_kwargs["upload_stager_config"] = extract_config( - flat_data=options, config=upload_stager_config_cls - ) - if upload_stager_cls := dest_entry.upload_stager: - return upload_stager_cls(**upload_stager_kwargs) - return None - - @staticmethod - def get_uploader(dest, options: dict[str, Any]) -> UploaderT: - dest_entry = destination_registry[dest] - uploader_kwargs: dict[str, Any] = {} - if uploader_config_cls := dest_entry.uploader_config: - uploader_kwargs["upload_config"] = extract_config( - flat_data=options, config=uploader_config_cls - ) - if connection_config_cls := dest_entry.connection_config: - uploader_kwargs["connection_config"] = extract_config( - flat_data=options, config=connection_config_cls - ) - uploader_cls = dest_entry.uploader - return uploader_cls(**uploader_kwargs) diff --git a/unstructured/ingest/v2/cli/base/dest.py b/unstructured/ingest/v2/cli/base/dest.py deleted file mode 100644 index b1703dcc8..000000000 --- a/unstructured/ingest/v2/cli/base/dest.py +++ /dev/null @@ -1,76 +0,0 @@ -import logging -from dataclasses import dataclass -from typing import Optional, Type - -import click - -from unstructured.ingest.v2.cli.base.cmd import BaseCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import Dict, conform_click_options -from unstructured.ingest.v2.logger import logger - - -@dataclass -class DestCmd(BaseCmd): - connection_config: Optional[Type[CliConfig]] = None - uploader_config: Optional[Type[CliConfig]] = None - upload_stager_config: Optional[Type[CliConfig]] = None - - def cmd(self, ctx: click.Context, **options) -> None: - logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO) - if not ctx.parent: - raise click.ClickException("destination command called without a parent") - if not ctx.parent.info_name: - raise click.ClickException("parent command missing info name") - source_cmd = ctx.parent.info_name.replace("-", "_") - source_options: dict = ctx.parent.params if ctx.parent else {} - conform_click_options(options) - try: - pipeline = self.get_pipeline( - src=source_cmd, - source_options=source_options, - dest=self.cmd_name, - destination_options=options, - ) - pipeline.run() - except Exception as e: - logger.error(f"failed to run destination command {self.cmd_name}: {e}", exc_info=True) - raise click.ClickException(str(e)) from e - - def get_cmd(self) -> click.Command: - # Dynamically create the command without the use of click decorators - fn = self.cmd - fn = click.pass_context(fn) - cmd = click.command(fn) - if not isinstance(cmd, click.core.Command): - raise ValueError(f"generated command was not of expected type Command: {type(cmd)}") - cmd.name = self.cli_cmd_name - cmd.short_help = "v2" - cmd.invoke_without_command = True - extras = [ - x - for x in [self.uploader_config, self.upload_stager_config, self.connection_config] - if x - ] - self.add_options(cmd, extras=extras) - cmd.params.append( - click.Option( - ["--custom-stager"], - required=False, - type=str, - default=None, - help="Pass a pointer to a custom upload stager to use, " - "must be in format ':'", - ) - ) - cmd.params.append( - click.Option( - ["--custom-stager-config-kwargs"], - required=False, - type=Dict(), - default=None, - help="Any kwargs to instantiate the configuration " - "associated with the customer stager", - ) - ) - return cmd diff --git a/unstructured/ingest/v2/cli/base/importer.py b/unstructured/ingest/v2/cli/base/importer.py deleted file mode 100644 index f77520ee1..000000000 --- a/unstructured/ingest/v2/cli/base/importer.py +++ /dev/null @@ -1,34 +0,0 @@ -import importlib -from typing import Any - - -class ImportFromStringError(Exception): - pass - - -def import_from_string(import_str: Any) -> Any: - if not isinstance(import_str, str): - return import_str - - module_str, _, attrs_str = import_str.partition(":") - if not module_str or not attrs_str: - message = 'Import string "{import_str}" must be in format ":".' - raise ImportFromStringError(message.format(import_str=import_str)) - - try: - module = importlib.import_module(module_str) - except ModuleNotFoundError as exc: - if exc.name != module_str: - raise exc from None - message = 'Could not import module "{module_str}".' - raise ImportFromStringError(message.format(module_str=module_str)) - - instance = module - try: - for attr_str in attrs_str.split("."): - instance = getattr(instance, attr_str) - except AttributeError: - message = 'Attribute "{attrs_str}" not found in module "{module_str}".' - raise ImportFromStringError(message.format(attrs_str=attrs_str, module_str=module_str)) - - return instance diff --git a/unstructured/ingest/v2/cli/base/src.py b/unstructured/ingest/v2/cli/base/src.py deleted file mode 100644 index 9ec350cad..000000000 --- a/unstructured/ingest/v2/cli/base/src.py +++ /dev/null @@ -1,70 +0,0 @@ -import logging -from dataclasses import dataclass, field -from typing import Any, Optional, Type - -import click - -from unstructured.ingest.v2.cli.base.cmd import BaseCmd -from unstructured.ingest.v2.cli.configs import ( - ChunkerCliConfig, - EmbedderCliConfig, - PartitionerCliConfig, - ProcessorCliConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import Group, conform_click_options -from unstructured.ingest.v2.logger import logger - - -@dataclass -class SrcCmd(BaseCmd): - indexer_config: Optional[Type[CliConfig]] = None - downloader_config: Optional[Type[CliConfig]] = None - connection_config: Optional[Type[CliConfig]] = None - default_configs: list[CliConfig] = field( - default_factory=lambda: [ - ProcessorCliConfig, - PartitionerCliConfig, - EmbedderCliConfig, - ChunkerCliConfig, - ] - ) - - def cmd(self, ctx: click.Context, **options: dict[str, Any]) -> None: - if ctx.invoked_subcommand: - return - - conform_click_options(options) - logger.setLevel(logging.DEBUG if options.get("verbose", False) else logging.INFO) - try: - pipeline = self.get_pipeline(src=self.cmd_name, source_options=options) - pipeline.run() - except Exception as e: - logger.error(f"failed to run source command {self.cmd_name}: {e}", exc_info=True) - raise click.ClickException(str(e)) from e - - def get_cmd(self) -> click.Group: - # Dynamically create the command without the use of click decorators - fn = self.cmd - fn = click.pass_context(fn) - cmd = click.group(fn, cls=Group) - if not isinstance(cmd, click.core.Group): - raise ValueError(f"generated src command was not of expected type Group: {type(cmd)}") - cmd.name = self.cli_cmd_name - cmd.short_help = "v2" - cmd.invoke_without_command = True - extras = [ - x for x in [self.indexer_config, self.downloader_config, self.connection_config] if x - ] - self.add_options(cmd, extras=extras) - - # TODO remove after v1 no longer supported - cmd.params.append( - click.Option( - ["--output-dir"], - required=False, - type=str, - help="Local path to write partitioned output to", - ) - ) - return cmd diff --git a/unstructured/ingest/v2/cli/cli.py b/unstructured/ingest/v2/cli/cli.py deleted file mode 100644 index a53c43565..000000000 --- a/unstructured/ingest/v2/cli/cli.py +++ /dev/null @@ -1,24 +0,0 @@ -import click - -from unstructured.ingest.v2.cli.cmds import dest, src - - -@click.group() -def ingest(): - pass - - -def get_cmd() -> click.Command: - """Construct and return a Click command object representing the main command for the CLI. - - This function adds all dest_subcommand(s) to each src_subcommand, and adds all of those - to the main command as nested subcommands. - """ - cmd = ingest - # Add all subcommands - for src_subcommand in src: - # Add all destination subcommands - for dest_subcommand in dest: - src_subcommand.add_command(dest_subcommand) - cmd.add_command(src_subcommand) - return cmd diff --git a/unstructured/ingest/v2/cli/cmds/__init__.py b/unstructured/ingest/v2/cli/cmds/__init__.py deleted file mode 100644 index 4a4a74c5d..000000000 --- a/unstructured/ingest/v2/cli/cmds/__init__.py +++ /dev/null @@ -1,87 +0,0 @@ -from collections import Counter - -import click - -from .astradb import astradb_dest_cmd -from .azure_cognitive_search import azure_cognitive_search_dest_cmd -from .chroma import chroma_dest_cmd -from .databricks_volumes import databricks_volumes_dest_cmd -from .elasticsearch import elasticsearch_dest_cmd, elasticsearch_src_cmd -from .fsspec.azure import azure_dest_cmd, azure_src_cmd -from .fsspec.box import box_dest_cmd, box_src_cmd -from .fsspec.dropbox import dropbox_dest_cmd, dropbox_src_cmd -from .fsspec.gcs import gcs_dest_cmd, gcs_src_cmd -from .fsspec.s3 import s3_dest_cmd, s3_src_cmd -from .fsspec.sftp import sftp_dest_cmd, sftp_src_cmd -from .google_drive import google_drive_src_cmd -from .local import local_dest_cmd, local_src_cmd -from .mongodb import mongodb_dest_cmd -from .onedrive import onedrive_drive_src_cmd -from .opensearch import opensearch_dest_cmd, opensearch_src_cmd -from .pinecone import pinecone_dest_cmd -from .salesforce import salesforce_src_cmd -from .sharepoint import sharepoint_drive_src_cmd -from .singlestore import singlestore_dest_cmd -from .sql import sql_dest_cmd -from .weaviate import weaviate_dest_cmd - -src_cmds = [ - azure_src_cmd, - box_src_cmd, - dropbox_src_cmd, - elasticsearch_src_cmd, - gcs_src_cmd, - google_drive_src_cmd, - local_src_cmd, - onedrive_drive_src_cmd, - opensearch_src_cmd, - s3_src_cmd, - salesforce_src_cmd, - sharepoint_drive_src_cmd, - sftp_src_cmd, -] -duplicate_src_names = [ - name for name, count in Counter([s.cmd_name for s in src_cmds]).items() if count > 1 -] -if duplicate_src_names: - raise ValueError( - "the following source cmd names were reused, all must be unique: {}".format( - ", ".join(duplicate_src_names) - ) - ) - -dest_cmds = [ - astradb_dest_cmd, - azure_cognitive_search_dest_cmd, - azure_dest_cmd, - box_dest_cmd, - chroma_dest_cmd, - dropbox_dest_cmd, - elasticsearch_dest_cmd, - gcs_dest_cmd, - local_dest_cmd, - opensearch_dest_cmd, - pinecone_dest_cmd, - s3_dest_cmd, - sftp_dest_cmd, - singlestore_dest_cmd, - weaviate_dest_cmd, - mongodb_dest_cmd, - databricks_volumes_dest_cmd, - sql_dest_cmd, -] - -duplicate_dest_names = [ - name for name, count in Counter([d.cmd_name for d in dest_cmds]).items() if count > 1 -] -if duplicate_dest_names: - raise ValueError( - "the following dest cmd names were reused, all must be unique: {}".format( - ", ".join(duplicate_dest_names) - ) - ) - - -src: list[click.Group] = [v.get_cmd() for v in src_cmds] - -dest: list[click.Command] = [v.get_cmd() for v in dest_cmds] diff --git a/unstructured/ingest/v2/cli/cmds/astradb.py b/unstructured/ingest/v2/cli/cmds/astradb.py deleted file mode 100644 index 36de30f70..000000000 --- a/unstructured/ingest/v2/cli/cmds/astradb.py +++ /dev/null @@ -1,85 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import Dict -from unstructured.ingest.v2.processes.connectors.astradb import CONNECTOR_TYPE - - -@dataclass -class AstraDBCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--token"], - required=True, - type=str, - help="Astra DB Token with access to the database.", - envvar="ASTRA_DB_APPLICATION_TOKEN", - show_envvar=True, - ), - click.Option( - ["--api-endpoint"], - required=True, - type=str, - help="The API endpoint for the Astra DB.", - envvar="ASTRA_DB_API_ENDPOINT", - show_envvar=True, - ), - ] - return options - - -@dataclass -class AstraDBCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--collection-name"], - required=False, - type=str, - help="The name of the Astra DB collection. " - "Note that the collection name must only include letters, " - "numbers, and underscores.", - ), - click.Option( - ["--embedding-dimension"], - required=True, - default=384, - type=int, - help="The dimensionality of the embeddings", - ), - click.Option( - ["--namespace"], - required=False, - default=None, - type=str, - help="The Astra DB connection namespace.", - ), - click.Option( - ["--requested-indexing-policy"], - required=False, - default=None, - type=Dict(), - help="The indexing policy to use for the collection." - 'example: \'{"deny": ["metadata"]}\' ', - ), - click.Option( - ["--batch-size"], - default=20, - type=int, - help="Number of records per batch", - ), - ] - return options - - -astradb_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=AstraDBCliConnectionConfig, - uploader_config=AstraDBCliUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py b/unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py deleted file mode 100644 index 6097606e5..000000000 --- a/unstructured/ingest/v2/cli/cmds/azure_cognitive_search.py +++ /dev/null @@ -1,72 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.azure_cognitive_search import CONNECTOR_TYPE - - -@dataclass -class AzureCognitiveSearchCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--index"], - required=True, - type=str, - help="The name of the Azure AI (Cognitive) Search index to connect to.", - envvar="AZURE_SEARCH_INDEX", - show_envvar=True, - ), - click.Option( - ["--endpoint"], - required=True, - type=str, - help="The URL endpoint of an Azure AI (Cognitive) search service." - "In the form of https://{{service_name}}.search.windows.net", - envvar="AZURE_SEARCH_ENDPOINT", - show_envvar=True, - ), - click.Option( - ["--key"], - required=True, - type=str, - help="Credential that is used for authenticating to an Azure service." - "(is an AzureKeyCredential)", - envvar="AZURE_SEARCH_API_KEY", - show_envvar=True, - ), - ] - return options - - -@dataclass -class AzureCognitiveSearchCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ), - ] - return options - - -@dataclass -class AzureCognitiveSearchCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [] - - -azure_cognitive_search_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=AzureCognitiveSearchCliConnectionConfig, - uploader_config=AzureCognitiveSearchCliUploaderConfig, - upload_stager_config=AzureCognitiveSearchCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/chroma.py b/unstructured/ingest/v2/cli/cmds/chroma.py deleted file mode 100644 index c13816351..000000000 --- a/unstructured/ingest/v2/cli/cmds/chroma.py +++ /dev/null @@ -1,108 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import Dict -from unstructured.ingest.v2.processes.connectors.chroma import CONNECTOR_TYPE - - -@dataclass -class ChromaCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--path"], - required=False, - type=str, - help="Location where Chroma is persisted," "if not connecting via http.", - ), - click.Option( - ["--settings"], - required=False, - type=Dict(), - help="A dictionary of settings to communicate with the chroma server." - 'example: \'{"persist_directory":"./chroma-persist"}\' ', - ), - click.Option( - ["--tenant"], - required=False, - default="default_tenant", - type=str, - help="The tenant to use for this client. Chroma defaults to 'default_tenant'.", - ), - click.Option( - ["--database"], - required=False, - default="default_database", - type=str, - help="The database to use for this client." - "Chroma defaults to 'default_database'.", - ), - click.Option( - ["--host"], - required=False, - type=str, - help="The hostname of the Chroma server.", - ), - click.Option( - ["--port"], - required=False, - type=int, - help="The port of the Chroma server.", - ), - click.Option( - ["--ssl"], - required=False, - default=False, - is_flag=True, - type=bool, - help="Whether to use SSL to connect to the Chroma server.", - ), - click.Option( - ["--headers"], - required=False, - type=Dict(), - help="A dictionary of headers to send to the Chroma server." - 'example: \'{"Authorization":"Basic()"}\' ', - ), - click.Option( - ["--collection-name"], - required=True, - type=str, - help="The name of the Chroma collection to write into.", - ), - ] - return options - - -@dataclass -class ChromaCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ) - ] - return options - - -@dataclass -class ChromaCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [] - - -chroma_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=ChromaCliConnectionConfig, - uploader_config=ChromaCliUploaderConfig, - upload_stager_config=ChromaCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/databricks_volumes.py b/unstructured/ingest/v2/cli/cmds/databricks_volumes.py deleted file mode 100644 index e8f8e2486..000000000 --- a/unstructured/ingest/v2/cli/cmds/databricks_volumes.py +++ /dev/null @@ -1,161 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.databricks_volumes import CONNECTOR_TYPE - - -@dataclass -class DatabricksVolumesCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--host"], - type=str, - default=None, - help="The Databricks host URL for either the " - "Databricks workspace endpoint or the " - "Databricks accounts endpoint.", - ), - click.Option( - ["--account-id"], - type=str, - default=None, - help="The Databricks account ID for the Databricks " - "accounts endpoint. Only has effect when Host is " - "either https://accounts.cloud.databricks.com/ (AWS), " - "https://accounts.azuredatabricks.net/ (Azure), " - "or https://accounts.gcp.databricks.com/ (GCP).", - ), - click.Option( - ["--username"], - type=str, - default=None, - help="The Databricks username part of basic authentication. " - "Only possible when Host is *.cloud.databricks.com (AWS).", - ), - click.Option( - ["--password"], - type=str, - default=None, - help="The Databricks password part of basic authentication. " - "Only possible when Host is *.cloud.databricks.com (AWS).", - ), - click.Option(["--client-id"], type=str, default=None), - click.Option(["--client-secret"], type=str, default=None), - click.Option( - ["--token"], - type=str, - default=None, - help="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or " - "Azure Active Directory (Azure AD) token (Azure).", - ), - click.Option( - ["--azure-workspace-resource-id"], - type=str, - default=None, - help="The Azure Resource Manager ID for the Azure Databricks workspace, " - "which is exchanged for a Databricks host URL.", - ), - click.Option( - ["--azure-client-secret"], - type=str, - default=None, - help="The Azure AD service principal’s client secret.", - ), - click.Option( - ["--azure-client-id"], - type=str, - default=None, - help="The Azure AD service principal’s application ID.", - ), - click.Option( - ["--azure-tenant-id"], - type=str, - default=None, - help="The Azure AD service principal’s tenant ID.", - ), - click.Option( - ["--azure-environment"], - type=str, - default=None, - help="The Azure environment type (such as Public, UsGov, China, and Germany) for a " - "specific set of API endpoints. Defaults to PUBLIC.", - ), - click.Option( - ["--auth-type"], - type=str, - default=None, - help="When multiple auth attributes are available in the " - "environment, use the auth type specified by this " - "argument. This argument also holds the currently " - "selected auth.", - ), - click.Option(["--cluster-id"], type=str, default=None), - click.Option(["--google-credentials"], type=str, default=None), - click.Option(["--google-service-account"], type=str, default=None), - ] - return options - - -@dataclass -class DatabricksVolumesCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--volume"], type=str, required=True, help="Name of volume in the Unity Catalog" - ), - click.Option( - ["--catalog"], - type=str, - required=True, - help="Name of the catalog in the Databricks Unity Catalog service", - ), - click.Option( - ["--volume-path"], - type=str, - required=False, - default=None, - help="Optional path within the volume to write to", - ), - click.Option( - ["--overwrite"], - type=bool, - is_flag=True, - help="If true, an existing file will be overwritten.", - ), - click.Option( - ["--encoding"], - type=str, - required=True, - default="utf-8", - help="Encoding applied to the data when written to the volume", - ), - click.Option( - ["--schema"], - type=str, - required=True, - default="default", - help="Schema associated with the volume to write to in the Unity Catalog service", - ), - ] - return options - - -@dataclass -class DatabricksVolumesCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [] - - -databricks_volumes_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=DatabricksVolumesCliConnectionConfig, - uploader_config=DatabricksVolumesCliUploaderConfig, - upload_stager_config=DatabricksVolumesCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/elasticsearch.py b/unstructured/ingest/v2/cli/cmds/elasticsearch.py deleted file mode 100644 index 8c52c97f7..000000000 --- a/unstructured/ingest/v2/cli/cmds/elasticsearch.py +++ /dev/null @@ -1,159 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString -from unstructured.ingest.v2.processes.connectors.elasticsearch import CONNECTOR_TYPE - - -@dataclass -class ElasticsearchCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--hosts"], - type=DelimitedString(), - help='List of the Elasticsearch hosts to connect to, e.g. "http://localhost:9200"', - ), - click.Option( - ["--username"], type=str, default=None, help="username when using basic auth" - ), - click.Option( - ["--password"], - type=str, - default=None, - help="password when using basic auth or connecting to a cloud instance", - ), - click.Option( - ["--cloud-id"], type=str, default=None, help="id used to connect to Elastic Cloud" - ), - click.Option( - ["--es-api-key"], type=str, default=None, help="api key used for authentication" - ), - click.Option( - ["--api-key-id"], - type=str, - default=None, - help="id associated with api key used for authentication: " - "https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html", # noqa: E501 - # noqa: E501 - ), - click.Option( - ["--bearer-auth"], - type=str, - default=None, - help="bearer token used for HTTP bearer authentication", - ), - click.Option( - ["--ca-certs"], - type=click.Path(), - default=None, - ), - click.Option( - ["--ssl-assert-fingerprint"], - type=str, - default=None, - help="SHA256 fingerprint value", - ), - ] - return options - - -@dataclass -class ElasticsearchCliDownloadConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - click.Option( - ["--fields"], - type=DelimitedString(), - default=[], - help="If provided, will limit the fields returned by Elasticsearch " - "to this comma-delimited list", - ), - ] - return options - - -@dataclass -class ElasticsearchCliIndexerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--index-name"], - required=True, - type=str, - help="Name of the Elasticsearch index to pull data from, or upload data to.", - ), - click.Option( - ["--batch-size"], - default=100, - type=click.IntRange(0), - help="how many records to read at a time per process", - ), - ] - return options - - -@dataclass -class ElasticsearchCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--index-name"], - required=True, - type=str, - help="Name of the Elasticsearch index to pull data from, or upload data to.", - ), - ] - return options - - -@dataclass -class ElasticsearchUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size-bytes"], - required=False, - default=15_000_000, - type=int, - help="Size limit (in bytes) for each batch of items to be uploaded. Check" - " https://www.elastic.co/guide/en/elasticsearch/guide/current/bulk.html" - "#_how_big_is_too_big for more information.", - ), - click.Option( - ["--num-threads"], - required=False, - default=1, - type=int, - help="Number of threads to be used while uploading content", - ), - ] - return options - - -elasticsearch_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=ElasticsearchCliConnectionConfig, - indexer_config=ElasticsearchCliIndexerConfig, - downloader_config=ElasticsearchCliDownloadConfig, -) - -elasticsearch_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=ElasticsearchCliConnectionConfig, - upload_stager_config=ElasticsearchCliUploadStagerConfig, - uploader_config=ElasticsearchUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/__init__.py b/unstructured/ingest/v2/cli/cmds/fsspec/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/azure.py b/unstructured/ingest/v2/cli/cmds/fsspec/azure.py deleted file mode 100644 index c5bdd2ab3..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/azure.py +++ /dev/null @@ -1,84 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( - FsspecCliDownloadConfig, - FsspecCliIndexerConfig, - FsspecCliUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.fsspec.azure import ( - CONNECTOR_TYPE, -) - - -@dataclass -class AzureCliDownloadConfig(FsspecCliDownloadConfig): - pass - - -@dataclass -class AzureCliIndexerConfig(FsspecCliIndexerConfig): - pass - - -@dataclass -class AzureCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--account-key"], - default=None, - help="The storage account key. This is used for shared key " - "authentication. If any of account key, sas token or " - "client_id are not specified, anonymous access will be used.", - ), - click.Option( - ["--account-name"], - default=None, - help="The storage account name. This is used to authenticate " - "requests signed with an account key and to construct " - "the storage endpoint. It is required unless a connection " - "string is given, or if a custom domain is used with " - "anonymous authentication.", - ), - click.Option( - ["--connection-string"], - default=None, - help="If specified, this will override all other parameters. See " - "http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/ " # noqa: E501 - "for the connection string format.", - ), - click.Option( - ["--sas_token"], - default=None, - help="A shared access signature token to use to authenticate " - "requests instead of the account key. If account key and " - "sas token are both specified, account key will be used " - "to sign. If any of account key, sas token or client_id " - "are not specified, anonymous access will be used.", - ), - ] - return options - - -@dataclass -class AzureUploaderConfig(FsspecCliUploaderConfig): - pass - - -azure_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=AzureCliIndexerConfig, - connection_config=AzureCliConnectionConfig, - downloader_config=AzureCliDownloadConfig, -) - -azure_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=AzureCliConnectionConfig, - uploader_config=AzureUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/box.py b/unstructured/ingest/v2/cli/cmds/fsspec/box.py deleted file mode 100644 index 99241b917..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/box.py +++ /dev/null @@ -1,58 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( - FsspecCliDownloadConfig, - FsspecCliIndexerConfig, - FsspecCliUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.fsspec.box import ( - CONNECTOR_TYPE, -) - - -@dataclass -class BoxCliDownloadConfig(FsspecCliDownloadConfig): - pass - - -@dataclass -class BoxCliIndexerConfig(FsspecCliIndexerConfig): - pass - - -@dataclass -class BoxCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--box-app-config"], - default=None, - type=click.Path(), - help="Path to Box app credentials as json file.", - ), - ] - return options - - -@dataclass -class BoxUploaderConfig(FsspecCliUploaderConfig): - pass - - -box_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=BoxCliIndexerConfig, - connection_config=BoxCliConnectionConfig, - downloader_config=BoxCliDownloadConfig, -) - -box_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=BoxCliConnectionConfig, - uploader_config=BoxUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py b/unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py deleted file mode 100644 index 7b7c4406d..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/dropbox.py +++ /dev/null @@ -1,58 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( - FsspecCliDownloadConfig, - FsspecCliIndexerConfig, - FsspecCliUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.fsspec.dropbox import ( - CONNECTOR_TYPE, -) - - -@dataclass -class DropboxCliDownloadConfig(FsspecCliDownloadConfig): - pass - - -@dataclass -class DropboxCliIndexerConfig(FsspecCliIndexerConfig): - pass - - -@dataclass -class DropboxCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--token"], - required=True, - type=str, - help="Dropbox access token.", - ), - ] - return options - - -@dataclass -class DropboxUploaderConfig(FsspecCliUploaderConfig): - pass - - -dropbox_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=DropboxCliIndexerConfig, - connection_config=DropboxCliConnectionConfig, - downloader_config=DropboxCliDownloadConfig, -) - -dropbox_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=DropboxCliConnectionConfig, - uploader_config=DropboxUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py b/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py deleted file mode 100644 index 858586c76..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/fsspec.py +++ /dev/null @@ -1,77 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString - - -@dataclass -class FsspecCliDownloadConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - ] - - -@dataclass -class FsspecCliFileConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [ - click.Option( - ["--remote-url"], - required=True, - help="Remote fsspec URL formatted as `protocol://dir/path`", - ) - ] - - -@dataclass -class FsspecCliUploaderConfig(FsspecCliFileConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = super(FsspecCliUploaderConfig, FsspecCliUploaderConfig).get_cli_options() - options.extend( - [ - click.Option( - ["--overwrite"], - is_flag=True, - default=False, - show_default=True, - help="If set, will overwrite content if content already exists", - ) - ] - ) - return options - - -@dataclass -class FsspecCliIndexerConfig(FsspecCliFileConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = super(FsspecCliIndexerConfig, FsspecCliIndexerConfig).get_cli_options() - options.extend( - [ - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - click.Option( - ["--file-glob"], - default=None, - type=DelimitedString(), - help="A comma-separated list of file globs to limit which types of " - "local files are accepted, e.g. '*.html,*.txt'", - ), - ] - ) - return options diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/gcs.py b/unstructured/ingest/v2/cli/cmds/fsspec/gcs.py deleted file mode 100644 index 7464d7769..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/gcs.py +++ /dev/null @@ -1,81 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( - FsspecCliDownloadConfig, - FsspecCliIndexerConfig, - FsspecCliUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import FileOrJson -from unstructured.ingest.v2.processes.connectors.fsspec.gcs import ( - CONNECTOR_TYPE, -) - - -@dataclass -class GcsCliDownloadConfig(FsspecCliDownloadConfig): - pass - - -@dataclass -class GcsCliIndexerConfig(FsspecCliIndexerConfig): - pass - - -@dataclass -class GcsCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - help_string = """ - Options: - - ``None``, GCSFS will attempt to guess your credentials in the - following order: gcloud CLI default, gcsfs cached token, google compute - metadata service, anonymous. - - ``'google_default'``, your default gcloud credentials will be used, - which are typically established by doing ``gcloud login`` in a terminal. - - ``'cache'``, credentials from previously successful gcsfs - authentication will be used (use this after "browser" auth succeeded) - - ``'anon'``, no authentication is performed, and you can only - access data which is accessible to allUsers (in this case, the project and - access level parameters are meaningless) - - ``'browser'``, you get an access code with which you can - authenticate via a specially provided URL - - if ``'cloud'``, we assume we are running within google compute - or google container engine, and query the internal metadata directly for - a token. - - you may supply a token generated by the - [gcloud](https://cloud.google.com/sdk/docs/) - utility; this is either a python dictionary or the name of a file - containing the JSON returned by logging in with the gcloud CLI tool. - """ - options = [ - click.Option( - ["--service-account-key"], - default=None, - type=FileOrJson(allow_raw_str=True), - help=help_string, - ), - ] - return options - - -@dataclass -class GcsUploaderConfig(FsspecCliUploaderConfig): - pass - - -gcs_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=GcsCliIndexerConfig, - connection_config=GcsCliConnectionConfig, - downloader_config=GcsCliDownloadConfig, -) - -gcs_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=GcsCliConnectionConfig, - uploader_config=GcsUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/s3.py b/unstructured/ingest/v2/cli/cmds/fsspec/s3.py deleted file mode 100644 index 4af72d4d4..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/s3.py +++ /dev/null @@ -1,84 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( - FsspecCliDownloadConfig, - FsspecCliIndexerConfig, - FsspecCliUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.fsspec.s3 import ( - CONNECTOR_TYPE, -) - - -@dataclass -class S3CliDownloadConfig(FsspecCliDownloadConfig): - pass - - -@dataclass -class S3CliIndexerConfig(FsspecCliIndexerConfig): - pass - - -@dataclass -class S3CliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--anonymous"], - is_flag=True, - default=False, - help="Connect to s3 without local AWS credentials.", - ), - click.Option( - ["--endpoint-url"], - type=str, - default=None, - help="Use this endpoint_url, if specified. Needed for " - "connecting to non-AWS S3 buckets.", - ), - click.Option( - ["--key"], - type=str, - default=None, - help="If not anonymous, use this access key ID, if specified. Takes precedence " - "over `aws_access_key_id` in client_kwargs.", - ), - click.Option( - ["--secret"], - type=str, - default=None, - help="If not anonymous, use this secret access key, if specified.", - ), - click.Option( - ["--token"], - type=str, - default=None, - help="If not anonymous, use this security token, if specified.", - ), - ] - return options - - -@dataclass -class S3UploaderConfig(FsspecCliUploaderConfig): - pass - - -s3_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=S3CliIndexerConfig, - connection_config=S3CliConnectionConfig, - downloader_config=S3CliDownloadConfig, -) - -s3_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=S3CliConnectionConfig, - uploader_config=S3UploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/fsspec/sftp.py b/unstructured/ingest/v2/cli/cmds/fsspec/sftp.py deleted file mode 100644 index b4bfcb6c8..000000000 --- a/unstructured/ingest/v2/cli/cmds/fsspec/sftp.py +++ /dev/null @@ -1,80 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.fsspec.fsspec import ( - FsspecCliDownloadConfig, - FsspecCliIndexerConfig, - FsspecCliUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.fsspec.sftp import ( - CONNECTOR_TYPE, -) - - -@dataclass -class SftpCliDownloadConfig(FsspecCliDownloadConfig): - pass - - -@dataclass -class SftpCliIndexerConfig(FsspecCliIndexerConfig): - pass - - -@dataclass -class SftpCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--username"], - required=True, - type=str, - help="Username for sftp connection", - ), - click.Option( - ["--password"], - required=True, - type=str, - help="Password for sftp connection", - ), - click.Option( - ["--look-for-keys"], - required=False, - default=False, - is_flag=True, - type=bool, - help="Whether to search for private key files in ~/.ssh/", - ), - click.Option( - ["--allow-agent"], - required=False, - default=False, - is_flag=True, - type=bool, - help="Whether to connect to the SSH agent.", - ), - ] - return options - - -@dataclass -class SftpUploaderConfig(FsspecCliUploaderConfig): - pass - - -sftp_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=SftpCliIndexerConfig, - connection_config=SftpCliConnectionConfig, - downloader_config=SftpCliDownloadConfig, -) - -sftp_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=SftpCliConnectionConfig, - uploader_config=SftpUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/google_drive.py b/unstructured/ingest/v2/cli/cmds/google_drive.py deleted file mode 100644 index 2a8d7960c..000000000 --- a/unstructured/ingest/v2/cli/cmds/google_drive.py +++ /dev/null @@ -1,74 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import SrcCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString, FileOrJson -from unstructured.ingest.v2.processes.connectors.google_drive import CONNECTOR_TYPE - - -@dataclass -class GoogleDriveCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--drive-id"], - required=True, - type=str, - help="Google Drive File or Folder ID.", - ), - click.Option( - ["--service-account-key"], - required=True, - type=FileOrJson(), - help="Either the file path of the credentials file to use or a json string of " - "those values to use for authentication", - ), - ] - return options - - -@dataclass -class GoogleDriveCliIndexerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--extensions"], - default=None, - type=DelimitedString(), - help="Filters the files to be processed based on extension e.g. jpg, docx, etc.", - ), - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - ] - return options - - -@dataclass -class GoogleDriveCliDownloadConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - ] - return options - - -google_drive_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=GoogleDriveCliConnectionConfig, - indexer_config=GoogleDriveCliIndexerConfig, - downloader_config=GoogleDriveCliDownloadConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/local.py b/unstructured/ingest/v2/cli/cmds/local.py deleted file mode 100644 index f9ab17308..000000000 --- a/unstructured/ingest/v2/cli/cmds/local.py +++ /dev/null @@ -1,60 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString -from unstructured.ingest.v2.processes.connectors.local import CONNECTOR_TYPE - - -@dataclass -class LocalCliIndexerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--input-path"], - required=True, - type=click.Path(file_okay=True, dir_okay=True, exists=True), - help="Path to the location in the local file system that will be processed.", - ), - click.Option( - ["--file-glob"], - default=None, - type=DelimitedString(), - help="A comma-separated list of file globs to limit which types of " - "local files are accepted, e.g. '*.html,*.txt'", - ), - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - ] - return options - - -@dataclass -class LocalCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--output-dir"], - required=True, - type=str, - help="Local path to write partitioned output to", - ) - ] - return options - - -local_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - indexer_config=LocalCliIndexerConfig, -) - -local_dest_cmd = DestCmd(cmd_name=CONNECTOR_TYPE, uploader_config=LocalCliUploaderConfig) diff --git a/unstructured/ingest/v2/cli/cmds/mongodb.py b/unstructured/ingest/v2/cli/cmds/mongodb.py deleted file mode 100644 index 49ad3e53d..000000000 --- a/unstructured/ingest/v2/cli/cmds/mongodb.py +++ /dev/null @@ -1,62 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.mongodb import CONNECTOR_TYPE - - -@dataclass -class MongoDBCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--uri"], - help="URI to user when connecting", - ), - click.Option( - ["--host"], - help="hostname or IP address or Unix domain socket path of a single mongod or " - "mongos instance to connect to, or a list of hostnames", - ), - click.Option(["--port"], type=int, default=27017), - click.Option( - ["--database"], type=str, required=True, help="database name to connect to" - ), - click.Option( - ["--collection"], required=True, type=str, help="collection name to connect to" - ), - ] - return options - - -@dataclass -class MongoDBCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ) - ] - return options - - -@dataclass -class MongoDBCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [] - - -mongodb_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=MongoDBCliConnectionConfig, - uploader_config=MongoDBCliUploaderConfig, - upload_stager_config=MongoDBCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/onedrive.py b/unstructured/ingest/v2/cli/cmds/onedrive.py deleted file mode 100644 index d9bc7df2c..000000000 --- a/unstructured/ingest/v2/cli/cmds/onedrive.py +++ /dev/null @@ -1,91 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import SrcCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.onedrive import CONNECTOR_TYPE - - -@dataclass -class OnedriveCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--client-id"], - required=True, - type=str, - help="Microsoft app client ID", - ), - click.Option( - ["--client-cred"], - required=True, - type=str, - help="Microsoft App client secret", - ), - click.Option( - ["--user-pname"], - required=True, - type=str, - help="User principal name, usually is your Azure AD email.", - ), - click.Option( - ["--tenant"], - default="common", - type=str, - help="ID or domain name associated with your Azure AD instance", - ), - click.Option( - ["--authority-url"], - default="https://login.microsoftonline.com", - type=str, - help="Authentication token provider for Microsoft apps, default is " - "https://login.microsoftonline.com", - ), - ] - return options - - -@dataclass -class OnedriveCliIndexerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--path"], - default=None, - type=str, - help="Folder to start parsing files from.", - ), - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - ] - return options - - -@dataclass -class OnedriveCliDownloadConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - ] - return options - - -onedrive_drive_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=OnedriveCliConnectionConfig, - indexer_config=OnedriveCliIndexerConfig, - downloader_config=OnedriveCliDownloadConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/opensearch.py b/unstructured/ingest/v2/cli/cmds/opensearch.py deleted file mode 100644 index 8d93b7be3..000000000 --- a/unstructured/ingest/v2/cli/cmds/opensearch.py +++ /dev/null @@ -1,93 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd, SrcCmd -from unstructured.ingest.v2.cli.cmds.elasticsearch import ( - ElasticsearchCliDownloadConfig, - ElasticsearchCliIndexerConfig, - ElasticsearchCliUploadStagerConfig, - ElasticsearchUploaderConfig, -) -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString -from unstructured.ingest.v2.processes.connectors.opensearch import CONNECTOR_TYPE - - -@dataclass -class OpenSearchCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--hosts"], - type=DelimitedString(), - help='List of the OpenSearch hosts to connect to, e.g. "http://localhost:9200"', - ), - click.Option( - ["--username"], type=str, default=None, help="username when using basic auth" - ), - click.Option( - ["--password"], - type=str, - default=None, - help="password when using basic auth", - ), - click.Option( - ["--use-ssl"], - type=bool, - default=False, - is_flag=True, - help="use ssl for the connection", - ), - click.Option( - ["--verify-certs"], - type=bool, - default=False, - is_flag=True, - help="whether to verify SSL certificates", - ), - click.Option( - ["--ssl-show-warn"], - type=bool, - default=False, - is_flag=True, - help="show warning when verify certs is disabled", - ), - click.Option( - ["--ca-certs"], - type=click.Path(), - default=None, - help="path to CA bundle", - ), - click.Option( - ["--client-cert"], - type=click.Path(), - default=None, - help="path to the file containing the private key and the certificate," - " or cert only if using client_key", - ), - click.Option( - ["--client-key"], - type=click.Path(), - default=None, - help="path to the file containing the private key" - " if using separate cert and key files", - ), - ] - return options - - -opensearch_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=OpenSearchCliConnectionConfig, - indexer_config=ElasticsearchCliIndexerConfig, - downloader_config=ElasticsearchCliDownloadConfig, -) - -opensearch_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=OpenSearchCliConnectionConfig, - upload_stager_config=ElasticsearchCliUploadStagerConfig, - uploader_config=ElasticsearchUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/pinecone.py b/unstructured/ingest/v2/cli/cmds/pinecone.py deleted file mode 100644 index 010cc703c..000000000 --- a/unstructured/ingest/v2/cli/cmds/pinecone.py +++ /dev/null @@ -1,62 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.pinecone import CONNECTOR_TYPE - - -@dataclass -class PineconeCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--api-key"], - required=True, - type=str, - help="API key for Pinecone.", - ), - click.Option( - ["--index-name"], - required=True, - type=str, - help="Name of the index to connect to. Example: my-index", - ), - click.Option( - ["--environment"], - required=True, - type=str, - help="Environment to connect to. Example: us-east-1", - ), - ] - return options - - -@dataclass -class PineconeCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ), - click.Option( - ["--num-processes"], - default=4, - type=int, - help="Number of processes to use for uploading", - ), - ] - return options - - -pinecone_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=PineconeCliConnectionConfig, - uploader_config=PineconeCliUploaderConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/salesforce.py b/unstructured/ingest/v2/cli/cmds/salesforce.py deleted file mode 100644 index ac910b546..000000000 --- a/unstructured/ingest/v2/cli/cmds/salesforce.py +++ /dev/null @@ -1,79 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import SrcCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString -from unstructured.ingest.v2.processes.connectors.salesforce import ( - ACCEPTED_CATEGORIES, - CONNECTOR_TYPE, -) - - -@dataclass -class SalesforceCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--username"], - required=True, - type=str, - help="Salesforce username usually looks like an email.", - ), - click.Option( - ["--consumer-key"], - required=True, - type=str, - help="For the Salesforce JWT auth. Found in Consumer Details.", - ), - click.Option( - ["--private-key"], - required=True, - type=str, - help="Path to the private key or its contents for the Salesforce JWT auth. " - "Key file is usually named server.key.", - ), - ] - return options - - -@dataclass -class SalesforceCliIndexerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - possible_categories = ACCEPTED_CATEGORIES - options = [ - click.Option( - ["--categories"], - default=None, - required=True, - type=DelimitedString(choices=possible_categories), - help="Comma-delimited salesforce categories to download. " - "Currently only {}.".format(", ".join(possible_categories)), - ), - ] - return options - - -@dataclass -class SalesforceCliDownloadConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - ] - return options - - -salesforce_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=SalesforceCliConnectionConfig, - indexer_config=SalesforceCliIndexerConfig, - downloader_config=SalesforceCliDownloadConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/sharepoint.py b/unstructured/ingest/v2/cli/cmds/sharepoint.py deleted file mode 100644 index 27d5cf3ed..000000000 --- a/unstructured/ingest/v2/cli/cmds/sharepoint.py +++ /dev/null @@ -1,112 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import SrcCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.sharepoint import CONNECTOR_TYPE - - -@dataclass -class SharepointCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--client-id"], - default=None, - type=str, - help="Sharepoint app client ID", - ), - click.Option( - ["--client-cred"], - default=None, - type=str, - help="Sharepoint app secret", - ), - click.Option( - ["--site"], - default=None, - type=str, - help="Sharepoint site url. Process either base url e.g \ - https://[tenant].sharepoint.com or relative sites \ - https://[tenant].sharepoint.com/sites/. \ - To process all sites within the tenant pass a site url as \ - https://[tenant]-admin.sharepoint.com.\ - This requires the app to be registered at a tenant level", - ), - click.Option( - ["--permissions-application-id"], - type=str, - help="Microsoft Graph API application id", - ), - click.Option( - ["--permissions-client-cred"], - type=str, - help="Microsoft Graph API application credentials", - ), - click.Option( - ["--permissions-tenant"], - type=str, - help="e.g https://contoso.onmicrosoft.com to get permissions data within tenant.", - ), - ] - return options - - -@dataclass -class SharepointCliIndexerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--path"], - default=None, - type=str, - help="Path from which to start parsing files. If the connector is to \ - process all sites within the tenant this filter will be applied to \ - all sites document libraries.", - ), - click.Option( - ["--recursive"], - is_flag=True, - default=False, - help="Recursively download files in their respective folders " - "otherwise stop at the files in provided folder level.", - ), - click.Option( - ["--omit-files"], - is_flag=True, - default=False, - help="Don't process files.", - ), - click.Option( - ["--omit-pages"], - is_flag=True, - default=False, - help="Don't process site pages.", - ), - ] - return options - - -@dataclass -class SharepointCliDownloadConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--download-dir"], - help="Where files are downloaded to, defaults to a location at" - "`$HOME/.cache/unstructured/ingest//`.", - ), - ] - return options - - -sharepoint_drive_src_cmd = SrcCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=SharepointCliConnectionConfig, - indexer_config=SharepointCliIndexerConfig, - downloader_config=SharepointCliDownloadConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/singlestore.py b/unstructured/ingest/v2/cli/cmds/singlestore.py deleted file mode 100644 index 1b7809d09..000000000 --- a/unstructured/ingest/v2/cli/cmds/singlestore.py +++ /dev/null @@ -1,96 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.singlestore import CONNECTOR_TYPE - - -@dataclass -class SingleStoreCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--host"], - required=False, - type=str, - default=None, - help="SingleStore host", - ), - click.Option( - ["--port"], - required=False, - type=int, - default=None, - help="SingleStore port", - ), - click.Option( - ["--user"], - required=False, - type=str, - default=None, - help="SingleStore user", - ), - click.Option( - ["--password"], - required=False, - type=str, - default=None, - help="SingleStore password", - ), - click.Option( - ["--database"], - required=False, - type=str, - default=None, - help="SingleStore database", - ), - ] - return options - - -@dataclass -class SingleStoreCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--drop-empty-cols"], - required=False, - type=bool, - is_flag=True, - default=False, - help="Drop any columns that have no data", - ), - ] - return options - - -@dataclass -class SingleStoreCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [ - click.Option( - ["--table-name"], - required=False, - type=str, - help="SingleStore table to write contents to", - ), - click.Option( - ["--batch-size"], - required=False, - type=click.IntRange(min=1), - help="Batch size when writing to SingleStore", - ), - ] - - -singlestore_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=SingleStoreCliConnectionConfig, - uploader_config=SingleStoreCliUploaderConfig, - upload_stager_config=SingleStoreCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/sql.py b/unstructured/ingest/v2/cli/cmds/sql.py deleted file mode 100644 index b36f3c3ac..000000000 --- a/unstructured/ingest/v2/cli/cmds/sql.py +++ /dev/null @@ -1,84 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.processes.connectors.sql import CONNECTOR_TYPE - -SQL_DRIVERS = {"postgresql", "sqlite"} - - -@dataclass -class SQLCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--db-type"], - required=True, - type=click.Choice(SQL_DRIVERS), - help="Type of the database backend", - ), - click.Option( - ["--username"], - default=None, - type=str, - help="DB username", - ), - click.Option( - ["--password"], - default=None, - type=str, - help="DB password", - ), - click.Option( - ["--host"], - default=None, - type=str, - help="DB host", - ), - click.Option( - ["--port"], - default=None, - type=int, - help="DB host connection port", - ), - click.Option( - ["--database"], - default=None, - type=str, - help="Database name. For sqlite databases, this is the path to the .db file.", - ), - ] - return options - - -@dataclass -class SQLCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ) - ] - return options - - -@dataclass -class SQLCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [] - - -sql_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=SQLCliConnectionConfig, - uploader_config=SQLCliUploaderConfig, - upload_stager_config=SQLCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/cmds/weaviate.py b/unstructured/ingest/v2/cli/cmds/weaviate.py deleted file mode 100644 index aaa051d05..000000000 --- a/unstructured/ingest/v2/cli/cmds/weaviate.py +++ /dev/null @@ -1,100 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.base import DestCmd -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString -from unstructured.ingest.v2.processes.connectors.weaviate import CONNECTOR_TYPE - - -@dataclass -class WeaviateCliConnectionConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--host-url"], - required=True, - help="Weaviate instance url", - ), - click.Option( - ["--class-name"], - default=None, - type=str, - help="Name of the class to push the records into, e.g: Pdf-elements", - ), - click.Option( - ["--access-token"], default=None, type=str, help="Used to create the bearer token." - ), - click.Option( - ["--refresh-token"], - default=None, - type=str, - help="Will tie this value to the bearer token. If not provided, " - "the authentication will expire once the lifetime of the access token is up.", - ), - click.Option( - ["--api-key"], - default=None, - type=str, - ), - click.Option( - ["--client-secret"], - default=None, - type=str, - ), - click.Option( - ["--scope"], - default=None, - type=DelimitedString(), - ), - click.Option( - ["--username"], - default=None, - type=str, - ), - click.Option( - ["--password"], - default=None, - type=str, - ), - click.Option( - ["--anonymous"], - is_flag=True, - default=False, - type=bool, - help="if set, all auth values will be ignored", - ), - ] - return options - - -@dataclass -class WeaviateCliUploaderConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--batch-size"], - default=100, - type=int, - help="Number of records per batch", - ) - ] - return options - - -@dataclass -class WeaviateCliUploadStagerConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - return [] - - -weaviate_dest_cmd = DestCmd( - cmd_name=CONNECTOR_TYPE, - connection_config=WeaviateCliConnectionConfig, - uploader_config=WeaviateCliUploaderConfig, - upload_stager_config=WeaviateCliUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/cli/configs/__init__.py b/unstructured/ingest/v2/cli/configs/__init__.py deleted file mode 100644 index 2b3a42192..000000000 --- a/unstructured/ingest/v2/cli/configs/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .chunk import ChunkerCliConfig -from .embed import EmbedderCliConfig -from .partition import PartitionerCliConfig -from .processor import ProcessorCliConfig - -__all__ = ["ChunkerCliConfig", "ProcessorCliConfig", "PartitionerCliConfig", "EmbedderCliConfig"] diff --git a/unstructured/ingest/v2/cli/configs/chunk.py b/unstructured/ingest/v2/cli/configs/chunk.py deleted file mode 100644 index b6f79641d..000000000 --- a/unstructured/ingest/v2/cli/configs/chunk.py +++ /dev/null @@ -1,89 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.chunking import CHUNK_MAX_CHARS_DEFAULT, CHUNK_MULTI_PAGE_DEFAULT -from unstructured.ingest.v2.cli.interfaces import CliConfig - - -@dataclass -class ChunkerCliConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--chunking-strategy"], - type=str, - default=None, - help="The rule-set to use to form chunks. Omit to disable chunking.", - ), - click.Option( - ["--chunk-combine-text-under-n-chars"], - type=int, - help=( - "Combine consecutive chunks when the first does not exceed this length and" - " the second will fit without exceeding the hard-maximum length. Only" - " operative for 'by_title' chunking-strategy." - ), - ), - click.Option( - ["--chunk-include-orig-elements/--chunk-no-include-orig-elements"], - is_flag=True, - default=True, - help=( - "When chunking, add the original elements consolidated to form each chunk to" - " `.metadata.orig_elements` on that chunk." - ), - ), - click.Option( - ["--chunk-max-characters"], - type=int, - default=CHUNK_MAX_CHARS_DEFAULT, - show_default=True, - help=( - "Hard maximum chunk length. No chunk will exceed this length. An oversized" - " element will be divided by text-splitting to fit this window." - ), - ), - click.Option( - ["--chunk-multipage-sections/--chunk-no-multipage-sections"], - is_flag=True, - default=CHUNK_MULTI_PAGE_DEFAULT, - help=( - "Ignore page boundaries when chunking such that elements from two different" - " pages can appear in the same chunk. Only operative for 'by_title'" - " chunking-strategy." - ), - ), - click.Option( - ["--chunk-new-after-n-chars"], - type=int, - help=( - "Soft-maximum chunk length. Another element will not be added to a chunk of" - " this length even when it would fit without exceeding the hard-maximum" - " length." - ), - ), - click.Option( - ["--chunk-overlap"], - type=int, - default=0, - show_default=True, - help=( - "Prefix chunk text with last overlap=N characters of prior chunk. Only" - " applies to oversized chunks divided by text-splitting. To apply overlap to" - " non-oversized chunks use the --overlap-all option." - ), - ), - click.Option( - ["--chunk-overlap-all"], - is_flag=True, - default=False, - help=( - "Apply overlap to chunks formed from whole elements as well as those formed" - " by text-splitting oversized elements. Overlap length is take from --overlap" - " option value." - ), - ), - ] - return options diff --git a/unstructured/ingest/v2/cli/configs/embed.py b/unstructured/ingest/v2/cli/configs/embed.py deleted file mode 100644 index 69f6bc657..000000000 --- a/unstructured/ingest/v2/cli/configs/embed.py +++ /dev/null @@ -1,74 +0,0 @@ -from dataclasses import dataclass -from typing import Any - -import click -from dataclasses_json.core import Json - -from unstructured.embed import EMBEDDING_PROVIDER_TO_CLASS_MAP -from unstructured.ingest.v2.cli.interfaces import CliConfig - - -@dataclass -class EmbedderCliConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--embedding-provider"], - help="Type of the embedding class to be used. Can be one of: " - f"{list(EMBEDDING_PROVIDER_TO_CLASS_MAP)}", - type=click.Choice(list(EMBEDDING_PROVIDER_TO_CLASS_MAP)), - ), - click.Option( - ["--embedding-api-key"], - help="API key for the embedding model, for the case an API key is needed.", - type=str, - default=None, - ), - click.Option( - ["--embedding-model-name"], - help="Embedding model name, if needed. " - "Chooses a particular LLM between different options, to embed with it.", - type=str, - default=None, - ), - click.Option( - ["--embedding-aws-access-key-id"], - help="AWS access key used for AWS-based embedders, such as bedrock", - type=str, - default=None, - ), - click.Option( - ["--embedding-aws-secret-access-key"], - help="AWS secret key used for AWS-based embedders, such as bedrock", - type=str, - default=None, - ), - click.Option( - ["--embedding-aws-region"], - help="AWS region used for AWS-based embedders, such as bedrock", - type=str, - default="us-west-2", - ), - ] - return options - - @classmethod - def from_dict(cls, kvs: Json, **kwargs: Any): - """ - Extension of the dataclass from_dict() to avoid a naming conflict with other CLI params. - This allows CLI arguments to be prepended with embedding_ during CLI invocation but - doesn't require that as part of the field names in this class - """ - if isinstance(kvs, dict): - new_kvs = { - k[len("embedding_") :]: v # noqa: E203 - for k, v in kvs.items() - if k.startswith("embedding_") - } - if len(new_kvs.keys()) == 0: - return None - if not new_kvs.get("provider"): - return None - return super().from_dict(new_kvs, **kwargs) - return super().from_dict(kvs, **kwargs) diff --git a/unstructured/ingest/v2/cli/configs/partition.py b/unstructured/ingest/v2/cli/configs/partition.py deleted file mode 100644 index 5ec5c0dbe..000000000 --- a/unstructured/ingest/v2/cli/configs/partition.py +++ /dev/null @@ -1,99 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.cli.utils import DelimitedString, Dict - - -@dataclass -class PartitionerCliConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--strategy"], - default="auto", - help="The method that will be used to process the documents. " - "Default: auto. Other strategies include `fast` and `hi_res`.", - ), - click.Option( - ["--ocr-languages"], - default=None, - type=DelimitedString(delimiter="+"), - help="A list of language packs to specify which languages to use for OCR, " - "separated by '+' e.g. 'eng+deu' to use the English and German language packs. " - "The appropriate Tesseract " - "language pack needs to be installed.", - ), - click.Option( - ["--encoding"], - default=None, - help="Text encoding to use when reading documents. By default the encoding is " - "detected automatically.", - ), - click.Option( - ["--skip-infer-table-types"], - type=DelimitedString(), - default=None, - help="Optional list of document types to skip table extraction on", - ), - click.Option( - ["--additional-partition-args"], - type=Dict(), - help="A json string representation of values to pass through to partition()", - ), - click.Option( - ["--fields-include"], - type=DelimitedString(), - default=["element_id", "text", "type", "metadata", "embeddings"], - help="Comma-delimited list. If set, include the specified top-level " - "fields in an element.", - ), - click.Option( - ["--flatten-metadata"], - is_flag=True, - default=False, - help="Results in flattened json elements. " - "Specifically, the metadata key values are brought to " - "the top-level of the element, and the `metadata` key itself is removed.", - ), - click.Option( - ["--metadata-include"], - default=[], - type=DelimitedString(), - help="Comma-delimited list. If set, include the specified metadata " - "fields if they exist and drop all other fields. ", - ), - click.Option( - ["--metadata-exclude"], - default=[], - type=DelimitedString(), - help="Comma-delimited list. If set, drop the specified metadata " - "fields if they exist.", - ), - click.Option( - ["--partition-by-api"], - is_flag=True, - default=False, - help="Use a remote API to partition the files." - " Otherwise, use the function from partition.auto", - ), - click.Option( - ["--partition-endpoint"], - default="https://api.unstructured.io/general/v0/general", - help="If partitioning via api, use the following host. " - "Default: https://api.unstructured.io/general/v0/general", - ), - click.Option( - ["--api-key"], - default=None, - help="API Key for partition endpoint.", - ), - click.Option( - ["--hi-res-model-name"], - default=None, - help="Model name for hi-res strategy.", - ), - ] - return options diff --git a/unstructured/ingest/v2/cli/configs/processor.py b/unstructured/ingest/v2/cli/configs/processor.py deleted file mode 100644 index b9236fad5..000000000 --- a/unstructured/ingest/v2/cli/configs/processor.py +++ /dev/null @@ -1,88 +0,0 @@ -from dataclasses import dataclass - -import click - -from unstructured.ingest.v2.cli.interfaces import CliConfig -from unstructured.ingest.v2.interfaces.processor import DEFAULT_WORK_DIR - - -@dataclass -class ProcessorCliConfig(CliConfig): - @staticmethod - def get_cli_options() -> list[click.Option]: - options = [ - click.Option( - ["--reprocess"], - is_flag=True, - default=False, - help="Reprocess a downloaded file even if the relevant structured " - "output .json file in output directory already exists.", - ), - click.Option( - ["--work-dir"], - type=str, - default=DEFAULT_WORK_DIR, - show_default=True, - help="Where to place working files when processing each step", - ), - click.Option( - ["--num-processes"], - default=2, - show_default=True, - type=click.IntRange(min=1), - help="Number of parallel processes with which to process docs", - ), - click.Option( - ["--max-connections"], - default=None, - show_default=True, - type=click.IntRange(min=1), - help="Max number of connections allowed when running an async step", - ), - click.Option( - ["--raise-on-error"], - is_flag=True, - default=False, - help="Is set, will raise error if any doc in the pipeline fail. Otherwise will " - "log error and continue with other docs", - ), - click.Option( - ["--re-download"], - is_flag=True, - default=False, - help="Re-download files even if they are already present in download dir.", - ), - click.Option( - ["--preserve-downloads"], - is_flag=True, - default=False, - help="Preserve downloaded files. Otherwise each file is removed " - "after being processed successfully.", - ), - click.Option( - ["--download-only"], - is_flag=True, - default=False, - help="Download any files that are not already present in either --download-dir or " - "the default download ~/.cache/... location in case --download-dir " - "is not specified and " - "skip processing them through unstructured.", - ), - click.Option( - ["--max-docs"], - default=None, - type=int, - help="If specified, process at most the specified number of documents.", - ), - click.Option( - ["--uncompress"], - type=bool, - default=False, - is_flag=True, - help="Uncompress any archived files. Currently supporting zip and tar " - "files based on file extension.", - ), - click.Option(["--verbose"], is_flag=True, default=False), - click.Option(["--tqdm"], is_flag=True, default=False, help="Show progress bar"), - ] - return options diff --git a/unstructured/ingest/v2/cli/interfaces.py b/unstructured/ingest/v2/cli/interfaces.py deleted file mode 100644 index 2a8a0e18b..000000000 --- a/unstructured/ingest/v2/cli/interfaces.py +++ /dev/null @@ -1,27 +0,0 @@ -from abc import ABC, abstractmethod - -import click - - -class CliConfig(ABC): - @staticmethod - @abstractmethod - def get_cli_options() -> list[click.Option]: - pass - - @classmethod - def add_cli_options(cls, cmd: click.Command) -> None: - options_to_add = cls.get_cli_options() - CliConfig.add_params(cmd, params=options_to_add) - - @staticmethod - def add_params(cmd: click.Command, params: list[click.Parameter]): - existing_opts = [] - for param in cmd.params: - existing_opts.extend(param.opts) - for param in params: - for opt in param.opts: - if opt in existing_opts: - raise ValueError(f"{opt} is already defined on the command {cmd.name}") - existing_opts.append(opt) - cmd.params.append(param) diff --git a/unstructured/ingest/v2/cli/utils.py b/unstructured/ingest/v2/cli/utils.py deleted file mode 100644 index 66d414f61..000000000 --- a/unstructured/ingest/v2/cli/utils.py +++ /dev/null @@ -1,240 +0,0 @@ -import json -import os.path -import sys -from dataclasses import fields, is_dataclass -from gettext import gettext, ngettext -from gettext import gettext as _ -from pathlib import Path -from typing import Any, ForwardRef, Optional, Type, TypeVar, Union, get_args, get_origin - -import click - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.v2.logger import logger - - -def conform_click_options(options: dict[str, Any]) -> None: - # Click sets all multiple fields as tuple, this needs to be updated to list - for k, v in options.items(): - if isinstance(v, tuple): - options[k] = list(v) - - -class Dict(click.ParamType): - name = "dict" - - def convert( - self, - value: Any, - param: Optional[click.Parameter] = None, - ctx: Optional[click.Context] = None, - ) -> Any: - try: - return json.loads(value) - except json.JSONDecodeError: - self.fail( - gettext( - "{value} is not a valid json value.", - ).format(value=value), - param, - ctx, - ) - - -class FileOrJson(click.ParamType): - name = "file-or-json" - - def __init__(self, allow_raw_str: bool = False): - self.allow_raw_str = allow_raw_str - - def convert( - self, - value: Any, - param: Optional[click.Parameter] = None, - ctx: Optional[click.Context] = None, - ) -> Any: - # check if valid file - full_path = os.path.abspath(os.path.expanduser(value)) - if os.path.isfile(full_path): - return str(Path(full_path).resolve()) - if isinstance(value, str): - try: - return json.loads(value) - except json.JSONDecodeError: - if self.allow_raw_str: - return value - self.fail( - gettext( - "{value} is not a valid json string nor an existing filepath.", - ).format(value=value), - param, - ctx, - ) - - -class DelimitedString(click.ParamType): - name = "delimited-string" - - def __init__(self, delimiter: str = ",", choices: Optional[list[str]] = None): - self.choices = choices if choices else [] - self.delimiter = delimiter - - def convert( - self, - value: Any, - param: Optional[click.Parameter] = None, - ctx: Optional[click.Context] = None, - ) -> Any: - # In case a list is provided as the default, will not break - if isinstance(value, list): - split = [str(v).strip() for v in value] - else: - split = [v.strip() for v in value.split(self.delimiter)] - if not self.choices: - return split - choices_str = ", ".join(map(repr, self.choices)) - for s in split: - if s not in self.choices: - self.fail( - ngettext( - "{value!r} is not {choice}.", - "{value!r} is not one of {choices}.", - len(self.choices), - ).format(value=s, choice=choices_str, choices=choices_str), - param, - ctx, - ) - return split - - -EnhancedDataClassJsonMixinT = TypeVar( - "EnhancedDataClassJsonMixinT", bound=EnhancedDataClassJsonMixin -) - - -def extract_config( - flat_data: dict, config: Type[EnhancedDataClassJsonMixinT] -) -> EnhancedDataClassJsonMixinT: - """ - To be able to extract a nested dataclass from a flat dictionary (as in one coming - from a click-based options input), the config class is dynamically looked through for - nested dataclass fields and new nested dictionaries are created to conform to the - shape the overall class expects when parsing from a dict. During the process, this will create - copies of the original dictionary to avoid pruning fields but this isn't a - problem since the `from_dict()` method ignores unneeded values. - - Not handling more complex edge cases for now such as nested types i.e Union[List[List[...]]] - """ - - def conform_dict(inner_d: dict, inner_config: Type[EnhancedDataClassJsonMixinT]): - # Catch edge cases (i.e. Dict[str, ...]) where underlying type is not a concrete Class, - # causing 'issubclass() arg 1 must be a class' errors, return False - def is_subclass(instance, class_type) -> bool: - try: - return issubclass(instance, class_type) - except Exception: - return False - - dd = inner_d.copy() - for field in fields(inner_config): - f_type = field.type - # typing can be defined using a string, in which case it needs to be resolved - # to the actual type. following logic is cherry picked from the typing - # get_type_hints() since type resolution can be expensive, only do it - # when the type is a string - if isinstance(f_type, str): - try: - base_globals = sys.modules[inner_config.__module__].__dict__ - for_ref = ForwardRef(f_type, is_argument=False, is_class=True) - f_type = for_ref._evaluate( - globalns=base_globals, localns=None, recursive_guard=frozenset() - ) - except NameError as e: - logger.warning(f"couldn't resolve type {f_type}: {e}") - # Handle the case where the type of a value if a Union (possibly optional) - if get_origin(f_type) is Union: - union_values = get_args(f_type) - # handle List types - union_values = [ - get_args(u)[0] if get_origin(u) is list else u for u in union_values - ] - # Ignore injected NoneType when optional - concrete_union_values = [v for v in union_values if not is_subclass(v, type(None))] - dataclass_union_values = [v for v in concrete_union_values if is_dataclass(v)] - non_dataclass_union_values = [ - v for v in concrete_union_values if not is_dataclass(v) - ] - if not dataclass_union_values: - continue - # Check if the key for this field already exists in the dictionary, - # if so it might map to one of these non dataclass fields and this - # can't be enforced - if non_dataclass_union_values and field.name in dd: - continue - if len(dataclass_union_values) > 1: - logger.warning( - "more than one dataclass type possible for field {}, " - "not extracting: {}".format(field.name, ", ".join(dataclass_union_values)) - ) - continue - f_type = dataclass_union_values[0] - origin = get_origin(f_type) - if origin: - f_type = origin - if is_subclass(f_type, EnhancedDataClassJsonMixin): - dd[field.name] = conform_dict(inner_d=dd, inner_config=f_type) - return dd - - adjusted_dict = conform_dict(inner_d=flat_data, inner_config=config) - return config.from_dict(adjusted_dict, apply_name_overload=False) - - -class Group(click.Group): - def parse_args(self, ctx, args): - """ - This allows for subcommands to be called with the --help flag without breaking - if parent command is missing any of its required parameters - """ - - try: - return super().parse_args(ctx, args) - except click.MissingParameter: - if "--help" not in args: - raise - - # remove the required params so that help can display - for param in self.params: - param.required = False - return super().parse_args(ctx, args) - - def format_commands(self, ctx: click.Context, formatter: click.HelpFormatter) -> None: - """ - Copy of the original click.Group format_commands() method but replacing - 'Commands' -> 'Destinations' - """ - commands = [] - for subcommand in self.list_commands(ctx): - cmd = self.get_command(ctx, subcommand) - # What is this, the tool lied about a command. Ignore it - if cmd is None: - continue - if cmd.hidden: - continue - - commands.append((subcommand, cmd)) - - # allow for 3 times the default spacing - if len(commands): - if formatter.width: - limit = formatter.width - 6 - max(len(cmd[0]) for cmd in commands) - else: - limit = -6 - max(len(cmd[0]) for cmd in commands) - - rows = [] - for subcommand, cmd in commands: - help = cmd.get_short_help_str(limit) - rows.append((subcommand, help)) - - if rows: - with formatter.section(_("Destinations")): - formatter.write_dl(rows) diff --git a/unstructured/ingest/v2/example.py b/unstructured/ingest/v2/example.py deleted file mode 100644 index c4545f926..000000000 --- a/unstructured/ingest/v2/example.py +++ /dev/null @@ -1,37 +0,0 @@ -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.fsspec.s3 import ( - S3ConnectionConfig, - S3DownloaderConfig, - S3IndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.local import ( - LocalUploaderConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig( - work_dir=str(work_dir.resolve()), tqdm=True, reprocess=True, verbose=True - ), - indexer_config=S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/"), - downloader_config=S3DownloaderConfig(download_dir=download_path), - source_connection_config=S3ConnectionConfig(anonymous=True), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())), - ).run() diff --git a/unstructured/ingest/v2/examples/example_azure_cognitive_search.py b/unstructured/ingest/v2/examples/example_azure_cognitive_search.py deleted file mode 100644 index f3679ad1b..000000000 --- a/unstructured/ingest/v2/examples/example_azure_cognitive_search.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.azure_cognitive_search import ( - AzureCognitiveSearchAccessConfig, - AzureCognitiveSearchConnectionConfig, - AzureCognitiveSearchUploaderConfig, - AzureCognitiveSearchUploadStagerConfig, -) -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - index_name = "ingest-test-destination" - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig( - input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt" - ), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig( - chunking_strategy="by_title", chunk_include_orig_elements=False - ), - embedder_config=EmbedderConfig( - embedding_provider="langchain-openai", embedding_api_key=os.getenv("OPENAI_API_KEY") - ), - destination_connection_config=AzureCognitiveSearchConnectionConfig( - access_config=AzureCognitiveSearchAccessConfig(key=os.getenv("AZURE_SEARCH_API_KEY")), - index=os.getenv("AZURE_SEARCH_INDEX"), - endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"), - ), - uploader_config=AzureCognitiveSearchUploaderConfig(batch_size=10), - stager_config=AzureCognitiveSearchUploadStagerConfig(), - ).run() diff --git a/unstructured/ingest/v2/examples/example_chroma.py b/unstructured/ingest/v2/examples/example_chroma.py deleted file mode 100644 index f5773c4d8..000000000 --- a/unstructured/ingest/v2/examples/example_chroma.py +++ /dev/null @@ -1,53 +0,0 @@ -import random -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.chroma import ( - ChromaAccessConfig, - ChromaConnectionConfig, - ChromaUploaderConfig, - ChromaUploadStagerConfig, -) -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig( - chunking_strategy="by_title", - chunk_include_orig_elements=False, - chunk_max_characters=1500, - chunk_multipage_sections=True, - ), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=ChromaConnectionConfig( - access_config=ChromaAccessConfig(settings=None, headers=None), - host="localhost", - port=8047, - collection_name=f"test-collection-{random.randint(1000,9999)}", - tenant="default_tenant", - database="default_database", - ), - stager_config=ChromaUploadStagerConfig(), - uploader_config=ChromaUploaderConfig(batch_size=10), - ).run() diff --git a/unstructured/ingest/v2/examples/example_databricks_volumes.py b/unstructured/ingest/v2/examples/example_databricks_volumes.py deleted file mode 100644 index ecc8b6301..000000000 --- a/unstructured/ingest/v2/examples/example_databricks_volumes.py +++ /dev/null @@ -1,54 +0,0 @@ -import os -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.databricks_volumes import ( - DatabricksVolumesAccessConfig, - DatabricksVolumesConnectionConfig, - DatabricksVolumesUploaderConfig, -) -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig( - chunking_strategy="by_title", - chunk_include_orig_elements=False, - chunk_max_characters=1500, - chunk_multipage_sections=True, - ), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=DatabricksVolumesConnectionConfig( - access_config=DatabricksVolumesAccessConfig( - username=os.environ["DATABRICKS_USERNAME"], - password=os.environ["DATABRICKS_PASSWORD"], - ), - host=os.environ["DATABRICKS_HOST"], - ), - uploader_config=DatabricksVolumesUploaderConfig( - catalog=os.environ["DATABRICKS_CATALOG"], - volume=os.environ["DATABRICKS_VOLUME"], - volume_path=os.environ["DATABRICKS_VOLUME_PATH"], - ), - ).run() diff --git a/unstructured/ingest/v2/examples/example_elasticsearch.py b/unstructured/ingest/v2/examples/example_elasticsearch.py deleted file mode 100644 index 96cdeef24..000000000 --- a/unstructured/ingest/v2/examples/example_elasticsearch.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.elasticsearch import ( - ElasticsearchAccessConfig, - ElasticsearchConnectionConfig, - ElasticsearchUploaderConfig, - ElasticsearchUploadStagerConfig, -) -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - index_name = "ingest-test-destination" - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig( - input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt" - ), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=ElasticsearchConnectionConfig( - access_config=ElasticsearchAccessConfig(password=os.getenv("ELASTIC_PASSWORD")), - username=os.getenv("ELASTIC_USERNAME"), - hosts=["http://localhost:9200"], - ), - uploader_config=ElasticsearchUploaderConfig(index_name=index_name), - stager_config=ElasticsearchUploadStagerConfig(index_name=index_name), - ).run() diff --git a/unstructured/ingest/v2/examples/example_local.py b/unstructured/ingest/v2/examples/example_local.py deleted file mode 100644 index f72334e40..000000000 --- a/unstructured/ingest/v2/examples/example_local.py +++ /dev/null @@ -1,35 +0,0 @@ -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, - LocalUploaderConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig( - input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt" - ), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())), - ).run() diff --git a/unstructured/ingest/v2/examples/example_mongodb.py b/unstructured/ingest/v2/examples/example_mongodb.py deleted file mode 100644 index 4ef562ae6..000000000 --- a/unstructured/ingest/v2/examples/example_mongodb.py +++ /dev/null @@ -1,52 +0,0 @@ -import random -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.mongodb import ( - MongoDBAccessConfig, - MongoDBConnectionConfig, - MongoDBUploaderConfig, - MongoDBUploadStagerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig( - chunking_strategy="by_title", - chunk_include_orig_elements=False, - chunk_max_characters=1500, - chunk_multipage_sections=True, - ), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=MongoDBConnectionConfig( - access_config=MongoDBAccessConfig(uri=None), - host="localhost", - port=27017, - collection=f"test-collection-{random.randint(1000,9999)}", - database="testDatabase", - ), - stager_config=MongoDBUploadStagerConfig(), - uploader_config=MongoDBUploaderConfig(batch_size=10), - ).run() diff --git a/unstructured/ingest/v2/examples/example_opensearch.py b/unstructured/ingest/v2/examples/example_opensearch.py deleted file mode 100644 index a5f654cfe..000000000 --- a/unstructured/ingest/v2/examples/example_opensearch.py +++ /dev/null @@ -1,51 +0,0 @@ -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.opensearch import ( - OpenSearchAccessConfig, - OpenSearchConnectionConfig, - OpenSearchUploaderConfig, - OpenSearchUploadStagerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig( - input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt" - ), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=OpenSearchConnectionConfig( - hosts="http://localhost:9247", - username="admin", - access_config=OpenSearchAccessConfig( - password="admin", - use_ssl=True, - ), - ), - stager_config=OpenSearchUploadStagerConfig(index_name="ingest-test-destination"), - uploader_config=OpenSearchUploaderConfig( - index_name="ingest-test-destination", batch_size_bytes=150 - ), - ).run() diff --git a/unstructured/ingest/v2/examples/example_pinecone.py b/unstructured/ingest/v2/examples/example_pinecone.py deleted file mode 100644 index 236a64df2..000000000 --- a/unstructured/ingest/v2/examples/example_pinecone.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.pinecone import ( - PineconeAccessConfig, - PineconeConnectionConfig, - PineconeUploaderConfig, - PineconeUploadStagerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig( - input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt" - ), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=PineconeConnectionConfig( - # You'll need to set PINECONE_API_KEY environment variable to run this example - access_config=PineconeAccessConfig(api_key=os.getenv("PINECONE_API_KEY")), - index_name=os.getenv( - "PINECONE_INDEX", - default="your index name here. e.g. my-index," - "or define in environment variable PINECONE_INDEX", - ), - environment=os.getenv( - "PINECONE_ENVIRONMENT", - default="your environment name here. e.g. us-east-1," - "or define in environment variable PINECONE_ENVIRONMENT", - ), - ), - stager_config=PineconeUploadStagerConfig(), - uploader_config=PineconeUploaderConfig(batch_size=10, num_of_processes=2), - ).run() diff --git a/unstructured/ingest/v2/examples/example_s3.py b/unstructured/ingest/v2/examples/example_s3.py deleted file mode 100644 index 2910f526d..000000000 --- a/unstructured/ingest/v2/examples/example_s3.py +++ /dev/null @@ -1,35 +0,0 @@ -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.fsspec.s3 import ( - S3ConnectionConfig, - S3DownloaderConfig, - S3IndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.local import ( - LocalUploaderConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/"), - downloader_config=S3DownloaderConfig(download_dir=download_path), - source_connection_config=S3ConnectionConfig(anonymous=True), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())), - ).run() diff --git a/unstructured/ingest/v2/examples/example_salesforce.py b/unstructured/ingest/v2/examples/example_salesforce.py deleted file mode 100644 index b3439d5aa..000000000 --- a/unstructured/ingest/v2/examples/example_salesforce.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalUploaderConfig, -) -from unstructured.ingest.v2.processes.connectors.salesforce import ( - SalesforceAccessConfig, - SalesforceConnectionConfig, - SalesforceDownloaderConfig, - SalesforceIndexerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=SalesforceIndexerConfig(categories=["Campaign", "EmailMessage"]), - downloader_config=SalesforceDownloaderConfig(download_dir=download_path), - source_connection_config=SalesforceConnectionConfig( - SalesforceAccessConfig( - consumer_key=os.getenv("SALESFORCE_CONSUMER_KEY"), - private_key=os.getenv("SALESFORCE_PRIVATE_KEY"), - ), - username=os.getenv("SALESFORCE_USERNAME"), - ), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())), - ).run() diff --git a/unstructured/ingest/v2/examples/example_sharepoint.py b/unstructured/ingest/v2/examples/example_sharepoint.py deleted file mode 100644 index bc9139efc..000000000 --- a/unstructured/ingest/v2/examples/example_sharepoint.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.connectors.local import ( - LocalUploaderConfig, -) -from unstructured.ingest.v2.processes.connectors.sharepoint import ( - SharepointAccessConfig, - SharepointConnectionConfig, - SharepointDownloaderConfig, - SharepointIndexerConfig, - SharepointPermissionsConfig, -) -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve()), tqdm=True, verbose=True), - indexer_config=SharepointIndexerConfig(), - downloader_config=SharepointDownloaderConfig(download_dir=download_path), - source_connection_config=SharepointConnectionConfig( - client_id=os.getenv("SHAREPOINT_CLIENT_ID"), - site=os.getenv("SHAREPOINT_SITE"), - access_config=SharepointAccessConfig(client_cred=os.getenv("SHAREPOINT_CRED")), - permissions_config=SharepointPermissionsConfig( - permissions_application_id=os.getenv("SHAREPOINT_PERMISSIONS_APP_ID"), - permissions_client_cred=os.getenv("SHAREPOINT_PERMISSIONS_APP_CRED"), - permissions_tenant=os.getenv("SHAREPOINT_PERMISSIONS_TENANT"), - ), - ), - partitioner_config=PartitionerConfig(strategy="fast"), - # chunker_config=ChunkerConfig(chunking_strategy="by_title"), - # embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - uploader_config=LocalUploaderConfig(output_dir=str(output_path.resolve())), - ).run() diff --git a/unstructured/ingest/v2/examples/example_singlestore.py b/unstructured/ingest/v2/examples/example_singlestore.py deleted file mode 100644 index 47d4494a9..000000000 --- a/unstructured/ingest/v2/examples/example_singlestore.py +++ /dev/null @@ -1,48 +0,0 @@ -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.singlestore import ( - SingleStoreAccessConfig, - SingleStoreConnectionConfig, - SingleStoreUploaderConfig, - SingleStoreUploadStagerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve()), tqdm=True, verbose=True), - indexer_config=LocalIndexerConfig( - input_path=str(docs_path.resolve()) + "/book-war-and-peace-1p.txt" - ), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=SingleStoreConnectionConfig( - access_config=SingleStoreAccessConfig(password="password"), - host="localhost", - port=3306, - database="ingest_test", - user="root", - ), - stager_config=SingleStoreUploadStagerConfig(), - uploader_config=SingleStoreUploaderConfig(table_name="elements"), - ).run() diff --git a/unstructured/ingest/v2/examples/example_sql.py b/unstructured/ingest/v2/examples/example_sql.py deleted file mode 100644 index 4ed938192..000000000 --- a/unstructured/ingest/v2/examples/example_sql.py +++ /dev/null @@ -1,88 +0,0 @@ -import os -import sqlite3 -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.sql import ( - DatabaseType, - SimpleSqlConfig, - SQLAccessConfig, - SQLUploaderConfig, - SQLUploadStagerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -SQLITE_DB = "test-sql-db.sqlite" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - - configs = { - "context": ProcessorConfig(work_dir=str(work_dir.resolve())), - "indexer_config": LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"), - "downloader_config": LocalDownloaderConfig(download_dir=download_path), - "source_connection_config": LocalConnectionConfig(), - "partitioner_config": PartitionerConfig(strategy="fast"), - "chunker_config": ChunkerConfig( - chunking_strategy="by_title", - chunk_include_orig_elements=False, - chunk_max_characters=1500, - chunk_multipage_sections=True, - ), - "embedder_config": EmbedderConfig(embedding_provider="langchain-huggingface"), - "stager_config": SQLUploadStagerConfig(), - "uploader_config": SQLUploaderConfig(batch_size=10), - } - - if os.path.exists(SQLITE_DB): - os.remove(SQLITE_DB) - - connection = sqlite3.connect(database=SQLITE_DB) - - query = None - script_path = ( - Path(__file__).parent.parent.parent.parent.parent - / Path("scripts/sql-test-helpers/create-sqlite-schema.sql") - ).resolve() - with open(script_path) as f: - query = f.read() - cursor = connection.cursor() - cursor.executescript(query) - connection.close() - - # sqlite test first - Pipeline.from_configs( - destination_connection_config=SimpleSqlConfig( - db_type=DatabaseType.SQLITE, - database=SQLITE_DB, - access_config=SQLAccessConfig(), - ), - **configs, - ).run() - - # now, pg with pgvector - Pipeline.from_configs( - destination_connection_config=SimpleSqlConfig( - db_type=DatabaseType.POSTGRESQL, - database="elements", - host="localhost", - port=5433, - access_config=SQLAccessConfig(username="unstructured", password="test"), - ), - **configs, - ).run() diff --git a/unstructured/ingest/v2/examples/example_weaviate.py b/unstructured/ingest/v2/examples/example_weaviate.py deleted file mode 100644 index 5b9e739c5..000000000 --- a/unstructured/ingest/v2/examples/example_weaviate.py +++ /dev/null @@ -1,44 +0,0 @@ -from pathlib import Path - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.pipeline import Pipeline -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connectors.local import ( - LocalConnectionConfig, - LocalDownloaderConfig, - LocalIndexerConfig, -) -from unstructured.ingest.v2.processes.connectors.weaviate import ( - WeaviateConnectionConfig, - WeaviateUploaderConfig, - WeaviateUploadStagerConfig, -) -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - -base_path = Path(__file__).parent.parent.parent.parent.parent -docs_path = base_path / "example-docs" -work_dir = base_path / "tmp_ingest" -output_path = work_dir / "output" -download_path = work_dir / "download" - -if __name__ == "__main__": - logger.info(f"Writing all content in: {work_dir.resolve()}") - Pipeline.from_configs( - context=ProcessorConfig(work_dir=str(work_dir.resolve())), - indexer_config=LocalIndexerConfig(input_path=str(docs_path.resolve()) + "/multisimple/"), - downloader_config=LocalDownloaderConfig(download_dir=download_path), - source_connection_config=LocalConnectionConfig(), - partitioner_config=PartitionerConfig(strategy="fast"), - chunker_config=ChunkerConfig(chunking_strategy="by_title"), - embedder_config=EmbedderConfig(embedding_provider="langchain-huggingface"), - destination_connection_config=WeaviateConnectionConfig( - host_url="http://localhost:8080", - class_name="elements", - access_config=None, - anonymous=True, - ), - stager_config=WeaviateUploadStagerConfig(), - uploader_config=WeaviateUploaderConfig(batch_size=10), - ).run() diff --git a/unstructured/ingest/v2/interfaces/__init__.py b/unstructured/ingest/v2/interfaces/__init__.py deleted file mode 100644 index 5aa6240ab..000000000 --- a/unstructured/ingest/v2/interfaces/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from .connector import AccessConfig, BaseConnector, ConnectionConfig -from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses -from .file_data import FileData, SourceIdentifiers -from .indexer import Indexer, IndexerConfig -from .process import BaseProcess -from .processor import ProcessorConfig -from .upload_stager import UploadStager, UploadStagerConfig -from .uploader import UploadContent, Uploader, UploaderConfig - -__all__ = [ - "DownloadResponse", - "download_responses", - "Downloader", - "DownloaderConfig", - "FileData", - "Indexer", - "IndexerConfig", - "BaseProcess", - "ProcessorConfig", - "UploadStager", - "UploadStagerConfig", - "Uploader", - "UploaderConfig", - "SourceIdentifiers", - "UploadContent", - "AccessConfig", - "ConnectionConfig", - "BaseConnector", -] diff --git a/unstructured/ingest/v2/interfaces/connector.py b/unstructured/ingest/v2/interfaces/connector.py deleted file mode 100644 index dc700fc94..000000000 --- a/unstructured/ingest/v2/interfaces/connector.py +++ /dev/null @@ -1,32 +0,0 @@ -from abc import ABC -from dataclasses import dataclass -from typing import Any, TypeVar - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin - - -@dataclass -class AccessConfig(EnhancedDataClassJsonMixin): - """Meant to designate holding any sensitive information associated with other configs - and also for access specific configs.""" - - -AccessConfigT = TypeVar("AccessConfigT", bound=AccessConfig) - - -@dataclass -class ConnectionConfig(EnhancedDataClassJsonMixin): - access_config: AccessConfigT - - def get_access_config(self) -> dict[str, Any]: - if not self.access_config: - return {} - return self.access_config.to_dict(apply_name_overload=False) - - -ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig) - - -@dataclass -class BaseConnector(ABC): - connection_config: ConnectionConfigT diff --git a/unstructured/ingest/v2/interfaces/downloader.py b/unstructured/ingest/v2/interfaces/downloader.py deleted file mode 100644 index 3a493b017..000000000 --- a/unstructured/ingest/v2/interfaces/downloader.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -from abc import ABC, abstractmethod -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Optional, TypedDict, TypeVar, Union - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.v2.interfaces.connector import BaseConnector -from unstructured.ingest.v2.interfaces.file_data import FileData -from unstructured.ingest.v2.interfaces.process import BaseProcess - - -@dataclass -class DownloaderConfig(EnhancedDataClassJsonMixin): - download_dir: Optional[Path] = None - - -DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig) - - -class DownloadResponse(TypedDict): - file_data: FileData - path: Path - - -download_responses = Union[list[DownloadResponse], DownloadResponse] - - -class Downloader(BaseProcess, BaseConnector, ABC): - connector_type: str - download_config: DownloaderConfigT - - @staticmethod - def is_float(value: str): - try: - float(value) - return True - except ValueError: - return False - - def generate_download_response( - self, file_data: FileData, download_path: Path - ) -> DownloadResponse: - if ( - file_data.metadata.date_modified - and self.is_float(file_data.metadata.date_modified) - and file_data.metadata.date_created - and self.is_float(file_data.metadata.date_created) - ): - date_modified = float(file_data.metadata.date_modified) - date_created = float(file_data.metadata.date_created) - os.utime(download_path, times=(date_created, date_modified)) - return DownloadResponse(file_data=file_data, path=download_path) - - @property - def download_dir(self) -> Path: - if self.download_config.download_dir is None: - self.download_config.download_dir = ( - Path.home() - / ".cache" - / "unstructured" - / "ingest" - / "download" - / self.connector_type - ).resolve() - return self.download_config.download_dir - - def is_async(self) -> bool: - return True - - def get_download_path(self, file_data: FileData) -> Optional[Path]: - return None - - @abstractmethod - def run(self, file_data: FileData, **kwargs: Any) -> download_responses: - pass - - async def run_async(self, file_data: FileData, **kwargs: Any) -> download_responses: - return self.run(file_data=file_data, **kwargs) diff --git a/unstructured/ingest/v2/interfaces/file_data.py b/unstructured/ingest/v2/interfaces/file_data.py deleted file mode 100644 index 9cccbaff0..000000000 --- a/unstructured/ingest/v2/interfaces/file_data.py +++ /dev/null @@ -1,56 +0,0 @@ -import json -from dataclasses import dataclass, field -from enum import Enum -from pathlib import Path -from typing import Any, Optional - -from dataclasses_json import DataClassJsonMixin - -from unstructured.documents.elements import DataSourceMetadata - - -class IndexDocType(str, Enum): - BATCH = "batch" - FILE = "file" - - -@dataclass -class SourceIdentifiers: - filename: str - fullpath: str - rel_path: Optional[str] = None - - @property - def filename_stem(self) -> str: - return Path(self.filename).stem - - @property - def relative_path(self) -> str: - return self.rel_path or self.fullpath - - -@dataclass -class FileData(DataClassJsonMixin): - identifier: str - connector_type: str - source_identifiers: Optional[SourceIdentifiers] = None - doc_type: IndexDocType = field(default=IndexDocType.FILE) - metadata: DataSourceMetadata = field(default_factory=DataSourceMetadata) - additional_metadata: dict[str, Any] = field(default_factory=dict) - reprocess: bool = False - - @classmethod - def from_file(cls, path: str) -> "FileData": - path = Path(path).resolve() - if not path.exists() or not path.is_file(): - raise ValueError(f"file path not valid: {path}") - with open(str(path.resolve()), "rb") as f: - file_data_dict = json.load(f) - file_data = FileData.from_dict(file_data_dict) - return file_data - - def to_file(self, path: str) -> None: - path = Path(path).resolve() - path.parent.mkdir(parents=True, exist_ok=True) - with open(str(path.resolve()), "w") as f: - json.dump(self.to_dict(), f, indent=2) diff --git a/unstructured/ingest/v2/interfaces/indexer.py b/unstructured/ingest/v2/interfaces/indexer.py deleted file mode 100644 index f3f2490ef..000000000 --- a/unstructured/ingest/v2/interfaces/indexer.py +++ /dev/null @@ -1,28 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any, Generator, Optional, TypeVar - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.v2.interfaces.connector import BaseConnector -from unstructured.ingest.v2.interfaces.file_data import FileData -from unstructured.ingest.v2.interfaces.process import BaseProcess - - -@dataclass -class IndexerConfig(EnhancedDataClassJsonMixin): - pass - - -IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig) - - -class Indexer(BaseProcess, BaseConnector, ABC): - connector_type: str - index_config: Optional[IndexerConfigT] = None - - def is_async(self) -> bool: - return False - - @abstractmethod - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - pass diff --git a/unstructured/ingest/v2/interfaces/process.py b/unstructured/ingest/v2/interfaces/process.py deleted file mode 100644 index 028356111..000000000 --- a/unstructured/ingest/v2/interfaces/process.py +++ /dev/null @@ -1,20 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Any - - -@dataclass -class BaseProcess(ABC): - def is_async(self) -> bool: - return False - - @abstractmethod - def run(self, **kwargs: Any) -> Any: - pass - - async def run_async(self, **kwargs: Any) -> Any: - return self.run(**kwargs) - - def check_connection(self): - # If the process requires external connections, run a quick check - pass diff --git a/unstructured/ingest/v2/interfaces/processor.py b/unstructured/ingest/v2/interfaces/processor.py deleted file mode 100644 index 96390e53f..000000000 --- a/unstructured/ingest/v2/interfaces/processor.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from asyncio import Semaphore -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Optional - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin - -DEFAULT_WORK_DIR = str((Path.home() / ".cache" / "unstructured" / "ingest" / "pipeline").resolve()) - - -@dataclass -class ProcessorConfig(EnhancedDataClassJsonMixin): - reprocess: bool = False - verbose: bool = False - tqdm: bool = False - work_dir: str = field(default_factory=lambda: DEFAULT_WORK_DIR) - num_processes: int = 2 - max_connections: Optional[int] = None - raise_on_error: bool = False - disable_parallelism: bool = field( - default_factory=lambda: os.getenv("INGEST_DISABLE_PARALLELISM", "false").lower() == "true" - ) - preserve_downloads: bool = False - download_only: bool = False - max_docs: Optional[int] = None - re_download: bool = False - uncompress: bool = False - - # Used to keep track of state in pipeline - status: dict[str, Any] = field(default_factory=dict) - semaphore: Optional[Semaphore] = field(init=False, default=None) - - def __post_init__(self): - if self.max_connections is not None: - self.semaphore = Semaphore(self.max_connections) - - @property - def mp_supported(self) -> bool: - return not self.disable_parallelism and self.num_processes > 1 - - @property - def async_supported(self) -> bool: - if self.disable_parallelism: - return False - if self.max_connections is not None and isinstance(self.max_connections, int): - return self.max_connections > 1 - return True diff --git a/unstructured/ingest/v2/interfaces/upload_stager.py b/unstructured/ingest/v2/interfaces/upload_stager.py deleted file mode 100644 index 2aeef2e5d..000000000 --- a/unstructured/ingest/v2/interfaces/upload_stager.py +++ /dev/null @@ -1,48 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from pathlib import Path -from typing import Any, TypeVar - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.v2.interfaces.file_data import FileData -from unstructured.ingest.v2.interfaces.process import BaseProcess - - -@dataclass -class UploadStagerConfig(EnhancedDataClassJsonMixin): - pass - - -UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig) - - -@dataclass -class UploadStager(BaseProcess, ABC): - upload_stager_config: UploadStagerConfigT - - @abstractmethod - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any - ) -> Path: - pass - - async def run_async( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any - ) -> Path: - return self.run( - elements_filepath=elements_filepath, - output_dir=output_dir, - output_filename=output_filename, - file_data=file_data, - **kwargs - ) diff --git a/unstructured/ingest/v2/interfaces/uploader.py b/unstructured/ingest/v2/interfaces/uploader.py deleted file mode 100644 index b8c282983..000000000 --- a/unstructured/ingest/v2/interfaces/uploader.py +++ /dev/null @@ -1,39 +0,0 @@ -from abc import ABC, abstractmethod -from dataclasses import dataclass -from pathlib import Path -from typing import Any, TypeVar - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.v2.interfaces.connector import BaseConnector -from unstructured.ingest.v2.interfaces.file_data import FileData -from unstructured.ingest.v2.interfaces.process import BaseProcess - - -@dataclass -class UploaderConfig(EnhancedDataClassJsonMixin): - pass - - -UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig) - - -@dataclass -class UploadContent: - path: Path - file_data: FileData - - -@dataclass -class Uploader(BaseProcess, BaseConnector, ABC): - upload_config: UploaderConfigT - connector_type: str - - def is_async(self) -> bool: - return False - - @abstractmethod - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - pass - - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return self.run(contents=[UploadContent(path=path, file_data=file_data)], **kwargs) diff --git a/unstructured/ingest/v2/logger.py b/unstructured/ingest/v2/logger.py deleted file mode 100644 index 34c5c1df3..000000000 --- a/unstructured/ingest/v2/logger.py +++ /dev/null @@ -1,123 +0,0 @@ -import ast -import json -import os -from logging import Formatter, Logger, StreamHandler, getLevelName, getLogger -from typing import Any, Callable - -log_level = os.getenv("INGEST_LOG_LEVEL", "INFO") -LOGGER_NAME = "unstructured.ingest.v2" - - -def default_is_data_sensitive(k: str, v: Any) -> bool: - sensitive_fields = [ - "account_name", - "client_id", - ] - sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"] - return ( - v - and any([s in k.lower() for s in sensitive_triggers]) # noqa: C419 - or k.lower() in sensitive_fields - ) - - -def hide_sensitive_fields( - data: dict, is_sensitive_fn: Callable[[str, Any], bool] = default_is_data_sensitive -) -> dict: - """ - Will recursively look through every k, v pair in this dict and any nested ones and run - is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if - any string value can be parsed as valid json and process that dict as well and replace - the original string with the json.dumps() version of the redacted dict. - """ - new_data = data.copy() - for k, v in new_data.items(): - if is_sensitive_fn(k, v): - new_data[k] = "*******" - if isinstance(v, dict): - new_data[k] = hide_sensitive_fields(v) - if isinstance(v, str): - # Need to take into account strings generated via json.dumps() or simply printing a dict - try: - json_data = json.loads(v) - if isinstance(json_data, dict): - updated_data = hide_sensitive_fields(json_data) - new_data[k] = json.dumps(updated_data) - except json.JSONDecodeError: - pass - - return new_data - - -def redact_jsons(s: str) -> str: - """ - Takes in a generic string and pulls out all valid json content. Leverages - hide_sensitive_fields() to redact any sensitive information and replaces the - original json with the new redacted format. There can be any number of valid - jsons in a generic string and this will work. Having extra '{' without a - closing '}' will cause this to break though. i.e '{ text, {"a": 3}'. - - """ - chars = list(s) - if "{" not in chars: - return s - i = 0 - jsons = [] - i = 0 - while i < len(chars): - char = chars[i] - if char == "{": - stack = [char] - current = [char] - while len(stack) != 0 and i < len(chars): - i += 1 - char = chars[i] - current.append(char) - if char == "{": - stack.append(char) - if char == "}": - stack.pop(-1) - jsons.append("".join(current)) - continue - i += 1 - for j in jsons: - try: - formatted_j = json.dumps(json.loads(j)) - except json.JSONDecodeError: - lit = ast.literal_eval(j) - formatted_j = json.dumps(lit) - hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j))) - s = s.replace(j, hidden_j) - return s - - -class SensitiveFormatter(Formatter): - def format(self, record): - s = super().format(record=record) - return redact_jsons(s) - - -def remove_root_handlers(logger: Logger) -> None: - # NOTE(robinson) - in some environments such as Google Colab, there is a root handler - # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs. - # Removing these when they exist prevents this behavior - if logger.root.hasHandlers(): - for handler in logger.root.handlers: - logger.root.removeHandler(handler) - - -def make_default_logger(level: int) -> Logger: - """Return a custom logger.""" - logger = getLogger(LOGGER_NAME) - handler = StreamHandler() - handler.name = "ingest_log_handler" - formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s") - handler.setFormatter(formatter) - if handler.name not in [h.name for h in logger.handlers]: - logger.addHandler(handler) - logger.setLevel(level) - remove_root_handlers(logger) - return logger - - -logger = make_default_logger(level=getLevelName(log_level.upper())) diff --git a/unstructured/ingest/v2/main.py b/unstructured/ingest/v2/main.py deleted file mode 100644 index f1b697717..000000000 --- a/unstructured/ingest/v2/main.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python3 -from unstructured.ingest.v2.cli.cli import get_cmd - - -def main(): - ingest_cmd = get_cmd() - ingest_cmd() - - -if __name__ == "__main__": - main() diff --git a/unstructured/ingest/v2/pipeline/__init__.py b/unstructured/ingest/v2/pipeline/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/v2/pipeline/interfaces.py b/unstructured/ingest/v2/pipeline/interfaces.py deleted file mode 100644 index ae6dd95d7..000000000 --- a/unstructured/ingest/v2/pipeline/interfaces.py +++ /dev/null @@ -1,169 +0,0 @@ -from __future__ import annotations - -import asyncio -import logging -import multiprocessing as mp -from abc import ABC -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass -from functools import wraps -from pathlib import Path -from time import time -from typing import Any, Awaitable, Callable, Optional, TypeVar - -from tqdm import tqdm -from tqdm.asyncio import tqdm as tqdm_asyncio - -from unstructured.ingest.v2.interfaces import BaseProcess, ProcessorConfig -from unstructured.ingest.v2.logger import logger, make_default_logger - -BaseProcessT = TypeVar("BaseProcessT", bound=BaseProcess) -iterable_input = list[dict[str, Any]] - - -def timed(func): - @wraps(func) - def time_it(self, *args, **kwargs): - start = time() - try: - return func(self, *args, **kwargs) - finally: - if func.__name__ == "__call__": - reported_name = f"{self.__class__.__name__} [cls]" - else: - reported_name = func.__name__ - logger.info(f"{reported_name} took {time() - start} seconds") - - return time_it - - -@dataclass -class PipelineStep(ABC): - process: BaseProcessT - context: ProcessorConfig - identifier: str - - def __str__(self): - return self.identifier - - def process_serially(self, iterable: iterable_input) -> Any: - logger.info("processing content serially") - if iterable: - if len(iterable) == 1: - return [self.run(**iterable[0])] - if self.context.tqdm: - return [self.run(**it) for it in tqdm(iterable, desc=self.identifier)] - return [self.run(**it) for it in iterable] - return [self.run()] - - async def _process_async(self, iterable: iterable_input) -> Any: - if iterable: - if len(iterable) == 1: - return [await self.run_async(**iterable[0])] - if self.context.tqdm: - return await tqdm_asyncio.gather( - *[self.run_async(**i) for i in iterable], desc=self.identifier - ) - return await asyncio.gather(*[self.run_async(**i) for i in iterable]) - return [await self.run_async()] - - def process_async(self, iterable: iterable_input) -> Any: - logger.info("processing content async") - return self.asyncio_run(fn=self._process_async, iterable=iterable) - - def asyncio_run( - self, fn: Callable[[Any, Any], Awaitable[Any]], *args: Any, **kwargs: Any - ) -> Any: - current_loop = asyncio._get_running_loop() - if current_loop is None: - return asyncio.run(fn(*args, **kwargs)) - with ThreadPoolExecutor(thread_name_prefix="asyncio") as thread_pool: - logger.warning( - f"async code being run in dedicated thread pool " - f"to not conflict with existing event loop: {current_loop}" - ) - - def wrapped(): - return asyncio.run(fn(*args, **kwargs)) - - future = thread_pool.submit(wrapped) - return future.result() - - def process_multiprocess(self, iterable: iterable_input) -> Any: - logger.info("processing content across processes") - - if iterable: - if len(iterable) == 1: - return [self.process_serially(iterable)] - if self.context.num_processes == 1: - return self.process_serially(iterable) - with mp.Pool( - processes=self.context.num_processes, - initializer=self._init_logger, - initargs=(logging.DEBUG if self.context.verbose else logging.INFO,), - ) as pool: - if self.context.tqdm: - return list( - tqdm( - pool.imap_unordered(func=self._wrap_mp, iterable=iterable), - total=len(iterable), - desc=self.identifier, - ) - ) - return pool.map(self._wrap_mp, iterable) - return [self.run()] - - def _wrap_mp(self, input_kwargs: dict) -> Any: - # Allow mapping of kwargs via multiprocessing map() - return self.run(**input_kwargs) - - def _init_logger(self, log_level: int): - # Init logger for each spawned process when using multiprocessing pool - make_default_logger(level=log_level) - - @timed - def __call__(self, iterable: Optional[iterable_input] = None) -> Any: - iterable = iterable or [] - if iterable: - logger.info( - f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore - ) - if self.context.async_supported and self.process.is_async(): - return self.process_async(iterable=iterable) - if self.context.mp_supported: - return self.process_multiprocess(iterable=iterable) - return self.process_serially(iterable=iterable) - - def _run(self, fn: Callable, **kwargs: Any) -> Optional[Any]: - return self.asyncio_run(fn=self.run_async, _fn=fn, **kwargs) - - async def _run_async(self, fn: Callable, **kwargs: Any) -> Optional[Any]: - raise NotImplementedError - - def run(self, _fn: Callable[..., Any] | None = None, **kwargs: Any) -> Optional[Any]: - try: - fn = _fn or self.process.run - return self._run(fn=fn, **kwargs) - except Exception as e: - logger.error(f"Exception raised while running {self.identifier}", exc_info=e) - if "file_data_path" in kwargs: - self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)} - if self.context.raise_on_error: - raise e - return None - - async def run_async(self, _fn: Optional[Callable] = None, **kwargs: Any) -> Optional[Any]: - try: - fn = _fn or self.process.run_async - return await self._run_async(fn=fn, **kwargs) - except Exception as e: - logger.error(f"Exception raised while running {self.identifier}", exc_info=e) - if "file_data_path" in kwargs: - self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)} - if self.context.raise_on_error: - raise e - return None - - @property - def cache_dir(self) -> Path: - return Path(self.context.work_dir) / self.identifier diff --git a/unstructured/ingest/v2/pipeline/pipeline.py b/unstructured/ingest/v2/pipeline/pipeline.py deleted file mode 100644 index 93c77dfa0..000000000 --- a/unstructured/ingest/v2/pipeline/pipeline.py +++ /dev/null @@ -1,286 +0,0 @@ -from __future__ import annotations - -import logging -import multiprocessing as mp -from dataclasses import InitVar, dataclass, field -from time import time -from typing import Any, Optional, Union - -from unstructured.ingest.v2.interfaces import ProcessorConfig -from unstructured.ingest.v2.logger import logger, make_default_logger -from unstructured.ingest.v2.pipeline.steps.chunk import Chunker, ChunkStep -from unstructured.ingest.v2.pipeline.steps.download import DownloaderT, DownloadStep -from unstructured.ingest.v2.pipeline.steps.embed import Embedder, EmbedStep -from unstructured.ingest.v2.pipeline.steps.index import IndexerT, IndexStep -from unstructured.ingest.v2.pipeline.steps.partition import Partitioner, PartitionStep -from unstructured.ingest.v2.pipeline.steps.stage import UploadStager, UploadStageStep -from unstructured.ingest.v2.pipeline.steps.uncompress import Uncompressor, UncompressStep -from unstructured.ingest.v2.pipeline.steps.upload import Uploader, UploadStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict -from unstructured.ingest.v2.processes.chunker import ChunkerConfig -from unstructured.ingest.v2.processes.connector_registry import ( - ConnectionConfig, - DownloaderConfigT, - IndexerConfigT, - UploaderConfigT, - UploadStagerConfigT, - destination_registry, - source_registry, -) -from unstructured.ingest.v2.processes.connectors.local import LocalUploader -from unstructured.ingest.v2.processes.embedder import EmbedderConfig -from unstructured.ingest.v2.processes.partitioner import PartitionerConfig - - -class PipelineError(Exception): - pass - - -@dataclass -class Pipeline: - context: ProcessorConfig - indexer: InitVar[IndexerT] - indexer_step: IndexStep = field(init=False) - downloader: InitVar[DownloaderT] - downloader_step: DownloadStep = field(init=False) - partitioner: InitVar[Partitioner] - partitioner_step: PartitionStep = field(init=False) - chunker: InitVar[Optional[Chunker]] = None - chunker_step: ChunkStep | None = field(init=False, default=None) - embedder: InitVar[Optional[Embedder]] = None - embedder_step: EmbedStep | None = field(init=False, default=None) - stager: InitVar[Optional[UploadStager]] = None - stager_step: UploadStageStep | None = field(init=False, default=None) - uploader: InitVar[Uploader] = field(default=LocalUploader()) - uploader_step: UploadStep | None = field(init=False, default=None) - uncompress_step: UncompressStep | None = field(init=False, default=None) - - def __post_init__( - self, - indexer: IndexerT, - downloader: DownloaderT, - partitioner: Partitioner, - chunker: Chunker | None = None, - embedder: Embedder | None = None, - stager: UploadStager | None = None, - uploader: Uploader | None = None, - ): - make_default_logger(level=logging.DEBUG if self.context.verbose else logging.INFO) - self.indexer_step = IndexStep(process=indexer, context=self.context) - self.downloader_step = DownloadStep(process=downloader, context=self.context) - self.partitioner_step = PartitionStep(process=partitioner, context=self.context) - self.chunker_step = ChunkStep(process=chunker, context=self.context) if chunker else None - - self.embedder_step = EmbedStep(process=embedder, context=self.context) if embedder else None - # TODO: support initialize() call from each step process - # Potential long call to download embedder models, run before any fanout: - if embedder and embedder.config: - embedder.config.get_embedder().initialize() - - self.stager_step = UploadStageStep(process=stager, context=self.context) if stager else None - self.uploader_step = UploadStep(process=uploader, context=self.context) - if self.context.uncompress: - process = Uncompressor() - self.uncompress_step = UncompressStep(process=process, context=self.context) - - self.check_destination_connector() - - def check_destination_connector(self): - # Make sure that if the set destination connector expects a stager, one is also set - if not self.uploader_step: - return - uploader_connector_type = self.uploader_step.process.connector_type - registry_entry = destination_registry[uploader_connector_type] - if registry_entry.upload_stager and self.stager_step is None: - raise ValueError( - f"pipeline with uploader type {self.uploader_step.process.__class__.__name__} " - f"expects a stager of type {registry_entry.upload_stager.__name__} " - f"but one was not set" - ) - - def cleanup(self): - pass - - def log_statuses(self): - if status := self.context.status: - logger.error(f"{len(status)} failed documents:") - for k, v in status.items(): - for kk, vv in v.items(): - logger.error(f"{k}: [{kk}] {vv}") - - def run(self): - try: - start_time = time() - self._run() - logger.info(f"Finished ingest process in {time() - start_time}s") - finally: - self.log_statuses() - self.cleanup() - if self.context.status: - raise PipelineError("Pipeline did not run successfully") - - def clean_results(self, results: list[Union[Any, list[Any]]] | None) -> list[Any] | None: - if not results: - return None - results = [r for r in results if r] - flat = [] - for r in results: - if isinstance(r, list): - flat.extend(r) - else: - flat.append(r) - final = [f for f in flat if f] - return final or None - - def _run(self): - logger.info( - f"Running local pipeline: {self} with configs: " - f"{sterilize_dict(self.context.to_dict(redact_sensitive=True))}" - ) - if self.context.mp_supported: - manager = mp.Manager() - self.context.status = manager.dict() - else: - self.context.status = {} - - # Index into data source - indices = self.indexer_step.run() - indices_inputs = [{"file_data_path": i} for i in indices] - if not indices_inputs: - return - - # Download associated content to local file system - downloaded_data = self.downloader_step(indices_inputs) - downloaded_data = self.clean_results(results=downloaded_data) - if not downloaded_data: - return - - # Run uncompress if available - if self.uncompress_step: - downloaded_data = self.uncompress_step(downloaded_data) - # Flatten list of lists - downloaded_data = self.clean_results(results=downloaded_data) - - if not downloaded_data: - return - - # Partition content - elements = self.partitioner_step(downloaded_data) - elements = self.clean_results(results=elements) - if not elements: - return - - # Run element specific modifiers - for step in [self.chunker_step, self.embedder_step, self.stager_step]: - elements = step(elements) if step else elements - elements = self.clean_results(results=elements) - if not elements: - return - - # Upload the final result - self.uploader_step(iterable=elements) - - def __str__(self): - s = [str(self.indexer_step), str(self.downloader_step)] - if uncompress_step := self.uncompress_step: - s.append(str(uncompress_step)) - s.append(str(self.partitioner_step)) - if chunker_step := self.chunker_step: - s.append(str(chunker_step)) - if embedder_step := self.embedder_step: - s.append(str(embedder_step)) - if stager_step := self.stager_step: - s.append(str(stager_step)) - s.append(str(self.uploader_step)) - return " -> ".join(s) - - @classmethod - def from_configs( - cls, - context: ProcessorConfig, - indexer_config: IndexerConfigT, - downloader_config: DownloaderConfigT, - source_connection_config: ConnectionConfig, - partitioner_config: PartitionerConfig, - chunker_config: Optional[ChunkerConfig] = None, - embedder_config: Optional[EmbedderConfig] = None, - destination_connection_config: Optional[ConnectionConfig] = None, - stager_config: Optional[UploadStagerConfigT] = None, - uploader_config: Optional[UploaderConfigT] = None, - ) -> "Pipeline": - # Get registry key based on indexer config - source_entry = { - k: v - for k, v in source_registry.items() - if isinstance(indexer_config, v.indexer_config) - and isinstance(downloader_config, v.downloader_config) - and isinstance(source_connection_config, v.connection_config) - } - if len(source_entry) > 1: - raise ValueError( - f"multiple entries found matching provided indexer, " - f"downloader and connection configs: {source_entry}" - ) - if len(source_entry) != 1: - raise ValueError( - "no entry found in source registry with matching indexer, " - "downloader and connection configs" - ) - source = list(source_entry.values())[0] - pipeline_kwargs = { - "context": context, - "indexer": source.indexer( - index_config=indexer_config, connection_config=source_connection_config - ), - "downloader": source.downloader( - download_config=downloader_config, connection_config=source_connection_config - ), - "partitioner": Partitioner(config=partitioner_config), - } - if chunker_config: - pipeline_kwargs["chunker"] = Chunker(config=chunker_config) - if embedder_config: - pipeline_kwargs["embedder"] = Embedder(config=embedder_config) - if not uploader_config: - return Pipeline(**pipeline_kwargs) - - destination_entry = { - k: v - for k, v in destination_registry.items() - if isinstance(uploader_config, v.uploader_config) - } - if destination_connection_config: - destination_entry = { - k: v - for k, v in destination_entry.items() - if isinstance(destination_connection_config, v.connection_config) - } - if stager_config: - destination_entry = { - k: v - for k, v in destination_entry.items() - if isinstance(stager_config, v.upload_stager_config) - } - - if len(destination_entry) > 1: - raise ValueError( - f"multiple entries found matching provided uploader, " - f"stager and connection configs: {destination_entry}" - ) - if len(destination_entry) != 1: - raise ValueError( - "no entry found in source registry with matching uploader, " - "stager and connection configs" - ) - - destination = list(destination_entry.values())[0] - if stager_config: - pipeline_kwargs["stager"] = destination.upload_stager( - upload_stager_config=stager_config - ) - if uploader_config: - uploader_kwargs = {"upload_config": uploader_config} - if destination_connection_config: - uploader_kwargs["connection_config"] = destination_connection_config - pipeline_kwargs["uploader"] = destination.uploader(**uploader_kwargs) - return cls(**pipeline_kwargs) diff --git a/unstructured/ingest/v2/pipeline/steps/__init__.py b/unstructured/ingest/v2/pipeline/steps/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/v2/pipeline/steps/chunk.py b/unstructured/ingest/v2/pipeline/steps/chunk.py deleted file mode 100644 index b2e5d14c2..000000000 --- a/unstructured/ingest/v2/pipeline/steps/chunk.py +++ /dev/null @@ -1,84 +0,0 @@ -import asyncio -import hashlib -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Optional, TypedDict - -from unstructured.ingest.v2.interfaces import FileData -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict -from unstructured.ingest.v2.processes.chunker import Chunker -from unstructured.staging.base import elements_to_dicts - -STEP_ID = "chunk" - - -class ChunkStepResponse(TypedDict): - file_data_path: str - path: str - - -@dataclass -class ChunkStep(PipelineStep): - process: Chunker - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.config.chunking_strategy})" - - def __post_init__(self): - config = ( - sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) - if self.process.config - else None - ) - logger.info(f"Created {self.identifier} with configs: {config}") - - def should_chunk(self, filepath: Path, file_data: FileData) -> bool: - if self.context.reprocess or file_data.reprocess: - return True - return not filepath.exists() - - def get_output_filepath(self, filename: Path) -> Path: - hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json" - filepath = (self.cache_dir / hashed_output_file).resolve() - filepath.parent.mkdir(parents=True, exist_ok=True) - return filepath - - def _save_output(self, output_filepath: str, chunked_content: list[dict]): - with open(str(output_filepath), "w") as f: - logger.debug(f"Writing chunker output to: {output_filepath}") - json.dump(chunked_content, f, indent=2) - - async def _run_async( - self, fn: Callable, path: str, file_data_path: str, **kwargs - ) -> ChunkStepResponse: - path = Path(path) - file_data = FileData.from_file(path=file_data_path) - output_filepath = self.get_output_filepath(filename=path) - if not self.should_chunk(filepath=output_filepath, file_data=file_data): - logger.debug(f"Skipping chunking, output already exists: {output_filepath}") - return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath)) - fn_kwargs = {"elements_filepath": path} - if not asyncio.iscoroutinefunction(fn): - chunked_content_raw = fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - chunked_content_raw = await fn(**fn_kwargs) - else: - chunked_content_raw = await fn(**fn_kwargs) - self._save_output( - output_filepath=str(output_filepath), - chunked_content=elements_to_dicts(chunked_content_raw), - ) - return ChunkStepResponse(file_data_path=file_data_path, path=str(output_filepath)) - - def get_hash(self, extras: Optional[list[str]]) -> str: - hashable_string = json.dumps( - self.process.config.to_dict(), sort_keys=True, ensure_ascii=True - ) - if extras: - hashable_string += "".join(extras) - return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/download.py b/unstructured/ingest/v2/pipeline/steps/download.py deleted file mode 100644 index 84d00e35d..000000000 --- a/unstructured/ingest/v2/pipeline/steps/download.py +++ /dev/null @@ -1,124 +0,0 @@ -import asyncio -import hashlib -import json -from dataclasses import dataclass -from typing import Callable, Optional, TypedDict, TypeVar - -from unstructured.ingest.v2.interfaces import FileData, download_responses -from unstructured.ingest.v2.interfaces.downloader import Downloader -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict - -DownloaderT = TypeVar("DownloaderT", bound=Downloader) - -STEP_ID = "download" - - -class DownloadStepResponse(TypedDict): - file_data_path: str - path: str - - -@dataclass -class DownloadStep(PipelineStep): - process: DownloaderT - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.__class__.__name__})" - - def __post_init__(self): - config = ( - sterilize_dict(self.process.download_config.to_dict(redact_sensitive=True)) - if self.process.download_config - else None - ) - connection_config = ( - sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True)) - if self.process.connection_config - else None - ) - logger.info( - f"Created {self.identifier} with configs: {config}, " - f"connection configs: {connection_config}" - ) - - @staticmethod - def is_float(value: str): - try: - float(value) - return True - except ValueError: - return False - - def should_download(self, file_data: FileData, file_data_path: str) -> bool: - if self.context.re_download: - return True - download_path = self.process.get_download_path(file_data=file_data) - if not download_path or not download_path.exists(): - return True - if ( - download_path.is_file() - and file_data.metadata.date_modified - and self.is_float(file_data.metadata.date_modified) - and download_path.stat().st_mtime > float(file_data.metadata.date_modified) - ): - # Also update file data to mark this to reprocess since this won't change the filename - file_data.reprocess = True - file_data.to_file(path=file_data_path) - return True - return False - - async def _run_async(self, fn: Callable, file_data_path: str) -> list[DownloadStepResponse]: - file_data = FileData.from_file(path=file_data_path) - download_path = self.process.get_download_path(file_data=file_data) - if not self.should_download(file_data=file_data, file_data_path=file_data_path): - logger.debug(f"Skipping download, file already exists locally: {download_path}") - return [DownloadStepResponse(file_data_path=file_data_path, path=str(download_path))] - fn_kwargs = {"file_data": file_data} - if not asyncio.iscoroutinefunction(fn): - download_results = fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - download_results = await fn(**fn_kwargs) - else: - download_results = await fn(**fn_kwargs) - return self.create_step_results( - current_file_data_path=file_data_path, download_results=download_results - ) - - def create_step_results( - self, current_file_data_path: str, download_results: download_responses - ) -> list[DownloadStepResponse]: - if not isinstance(download_results, list): - return [ - DownloadStepResponse( - file_data_path=current_file_data_path, path=str(download_results["path"]) - ) - ] - # Supplemental results generated as part of the download process - download_step_results = [] - for res in download_results: - file_data_path = self.persist_new_file_data(file_data=res["file_data"]) - download_step_results.append( - DownloadStepResponse(file_data_path=file_data_path, path=res["path"]) - ) - return download_step_results - - def persist_new_file_data(self, file_data: FileData) -> str: - record_hash = self.get_hash(extras=[file_data.identifier]) - filename = f"{record_hash}.json" - filepath = (self.cache_dir / filename).resolve() - filepath.parent.mkdir(parents=True, exist_ok=True) - with open(str(filepath), "w") as f: - json.dump(file_data.to_dict(), f, indent=2) - return str(filepath) - - def get_hash(self, extras: Optional[list[str]]) -> str: - hashable_string = json.dumps( - sterilize_dict(self.process.download_config.to_dict()), sort_keys=True - ) - if extras: - hashable_string += "".join(extras) - return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/embed.py b/unstructured/ingest/v2/pipeline/steps/embed.py deleted file mode 100644 index 94103951c..000000000 --- a/unstructured/ingest/v2/pipeline/steps/embed.py +++ /dev/null @@ -1,83 +0,0 @@ -import asyncio -import hashlib -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Optional, TypedDict - -from unstructured.ingest.v2.interfaces import FileData -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict -from unstructured.ingest.v2.processes.embedder import Embedder -from unstructured.staging.base import elements_to_dicts - -STEP_ID = "embed" - - -class EmbedStepResponse(TypedDict): - file_data_path: str - path: str - - -@dataclass -class EmbedStep(PipelineStep): - process: Embedder - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.config.embedding_provider})" - - def __post_init__(self): - config = ( - sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) - if self.process.config - else None - ) - logger.info(f"Created {self.identifier} with configs: {config}") - - def should_embed(self, filepath: Path, file_data: FileData) -> bool: - if self.context.reprocess or file_data.reprocess: - return True - return not filepath.exists() - - def get_output_filepath(self, filename: Path) -> Path: - hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json" - filepath = (self.cache_dir / hashed_output_file).resolve() - filepath.parent.mkdir(parents=True, exist_ok=True) - return filepath - - def _save_output(self, output_filepath: str, embedded_content: list[dict]): - with open(str(output_filepath), "w") as f: - logger.debug(f"Writing embedded output to: {output_filepath}") - json.dump(embedded_content, f, indent=2) - - async def _run_async(self, fn: Callable, path: str, file_data_path: str) -> EmbedStepResponse: - path = Path(path) - file_data = FileData.from_file(path=file_data_path) - output_filepath = self.get_output_filepath(filename=path) - if not self.should_embed(filepath=output_filepath, file_data=file_data): - logger.debug(f"Skipping embedding, output already exists: {output_filepath}") - return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath)) - fn_kwargs = {"elements_filepath": path} - if not asyncio.iscoroutinefunction(fn): - embed_content_raw = fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - embed_content_raw = await fn(**fn_kwargs) - else: - embed_content_raw = await fn(**fn_kwargs) - - self._save_output( - output_filepath=str(output_filepath), - embedded_content=elements_to_dicts(embed_content_raw), - ) - return EmbedStepResponse(file_data_path=file_data_path, path=str(output_filepath)) - - def get_hash(self, extras: Optional[list[str]]) -> str: - hashable_string = json.dumps( - self.process.config.to_dict(), sort_keys=True, ensure_ascii=True - ) - if extras: - hashable_string += "".join(extras) - return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/index.py b/unstructured/ingest/v2/pipeline/steps/index.py deleted file mode 100644 index d91a035ab..000000000 --- a/unstructured/ingest/v2/pipeline/steps/index.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import annotations - -import hashlib -import json -from dataclasses import dataclass -from typing import Any, Callable, Generator, Optional, TypeVar - -from unstructured.ingest.v2.interfaces.indexer import Indexer -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict - -IndexerT = TypeVar("IndexerT", bound=Indexer) - -STEP_ID = "index" - - -@dataclass -class IndexStep(PipelineStep): - process: IndexerT - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.__class__.__name__})" - - def __post_init__(self): - config = ( - sterilize_dict(self.process.index_config.to_dict(redact_sensitive=True)) - if self.process.index_config - else None - ) - connection_config = ( - sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True)) - if self.process.connection_config - else None - ) - logger.info( - f"Created {self.identifier} with configs: {config}, " - f"connection configs: {connection_config}" - ) - - def run( - self, _fn: Callable[..., Any] | None = None, **kwargs: Any - ) -> Generator[str, None, None]: - for file_data in self.process.run(): - logger.debug(f"Generated file data: {file_data}") - try: - record_hash = self.get_hash(extras=[file_data.identifier]) - filename = f"{record_hash}.json" - filepath = (self.cache_dir / filename).resolve() - filepath.parent.mkdir(parents=True, exist_ok=True) - with open(str(filepath), "w") as f: - json.dump(file_data.to_dict(), f, indent=2) - yield str(filepath) - except Exception as e: - logger.error(f"failed to create index for file data: {file_data}", exc_info=True) - if self.context.raise_on_error: - raise e - continue - - def get_hash(self, extras: Optional[list[str]]) -> str: - hashable_string = json.dumps(self.process.index_config.to_dict()) - if extras: - hashable_string += "".join(extras) - return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/partition.py b/unstructured/ingest/v2/pipeline/steps/partition.py deleted file mode 100644 index 541d2cae9..000000000 --- a/unstructured/ingest/v2/pipeline/steps/partition.py +++ /dev/null @@ -1,78 +0,0 @@ -import asyncio -import hashlib -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Optional, TypedDict - -from unstructured.ingest.v2.interfaces import FileData -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict -from unstructured.ingest.v2.processes.partitioner import Partitioner - -STEP_ID = "partition" - - -class PartitionStepResponse(TypedDict): - file_data_path: str - path: str - - -@dataclass -class PartitionStep(PipelineStep): - process: Partitioner - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.config.strategy})" - - def __post_init__(self): - config = sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) - logger.info(f"Created {self.identifier} with configs: {config}") - - def should_partition(self, filepath: Path, file_data: FileData) -> bool: - if self.context.reprocess or file_data.reprocess: - return True - return not filepath.exists() - - def get_output_filepath(self, filename: Path) -> Path: - hashed_output_file = f"{self.get_hash(extras=[filename.name])}.json" - filepath = (self.cache_dir / hashed_output_file).resolve() - filepath.parent.mkdir(parents=True, exist_ok=True) - return filepath - - def _save_output(self, output_filepath: str, partitioned_content: list[dict]): - with open(str(output_filepath), "w") as f: - logger.debug(f"Writing partitioned output to: {output_filepath}") - json.dump(partitioned_content, f, indent=2) - - async def _run_async( - self, fn: Callable, path: str, file_data_path: str - ) -> Optional[PartitionStepResponse]: - path = Path(path) - file_data = FileData.from_file(path=file_data_path) - output_filepath = self.get_output_filepath(filename=Path(file_data_path)) - if not self.should_partition(filepath=output_filepath, file_data=file_data): - logger.debug(f"Skipping partitioning, output already exists: {output_filepath}") - return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath)) - fn_kwargs = {"filename": path, "metadata": file_data.metadata} - if not asyncio.iscoroutinefunction(fn): - partitioned_content = fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - partitioned_content = await fn(**fn_kwargs) - else: - partitioned_content = await fn(**fn_kwargs) - self._save_output( - output_filepath=str(output_filepath), partitioned_content=partitioned_content - ) - return PartitionStepResponse(file_data_path=file_data_path, path=str(output_filepath)) - - def get_hash(self, extras: Optional[list[str]]) -> str: - hashable_string = json.dumps( - self.process.config.to_dict(), sort_keys=True, ensure_ascii=True - ) - if extras: - hashable_string += "".join(extras) - return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/stage.py b/unstructured/ingest/v2/pipeline/steps/stage.py deleted file mode 100644 index b4c6204ad..000000000 --- a/unstructured/ingest/v2/pipeline/steps/stage.py +++ /dev/null @@ -1,64 +0,0 @@ -import asyncio -import hashlib -import json -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Optional, TypedDict - -from unstructured.ingest.v2.interfaces.file_data import FileData -from unstructured.ingest.v2.interfaces.upload_stager import UploadStager -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict - -STEP_ID = "upload_stage" - - -class UploadStageStepResponse(TypedDict): - file_data_path: str - path: str - - -@dataclass -class UploadStageStep(PipelineStep): - process: UploadStager - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.__class__.__name__})" - - def __post_init__(self): - config = ( - sterilize_dict(self.process.upload_stager_config.to_dict(redact_sensitive=True)) - if self.process.upload_stager_config - else None - ) - self.cache_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Created {self.identifier} with configs: {config}") - - async def _run_async( - self, fn: Callable, path: str, file_data_path: str - ) -> UploadStageStepResponse: - path = Path(path) - fn_kwargs = { - "elements_filepath": path, - "file_data": FileData.from_file(path=file_data_path), - "output_dir": self.cache_dir, - "output_filename": self.get_hash(extras=[path.name]), - } - if not asyncio.iscoroutinefunction(fn): - staged_output_path = fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - staged_output_path = await fn(**fn_kwargs) - else: - staged_output_path = await fn(**fn_kwargs) - return UploadStageStepResponse(file_data_path=file_data_path, path=str(staged_output_path)) - - def get_hash(self, extras: Optional[list[str]]) -> str: - hashable_string = json.dumps( - self.process.upload_stager_config.to_dict(), sort_keys=True, ensure_ascii=True - ) - if extras: - hashable_string += "".join(extras) - return hashlib.sha256(hashable_string.encode()).hexdigest()[:12] diff --git a/unstructured/ingest/v2/pipeline/steps/uncompress.py b/unstructured/ingest/v2/pipeline/steps/uncompress.py deleted file mode 100644 index 987c9d5f6..000000000 --- a/unstructured/ingest/v2/pipeline/steps/uncompress.py +++ /dev/null @@ -1,68 +0,0 @@ -import asyncio -from pathlib import Path -from typing import Callable, TypedDict - -from unstructured.ingest.v2.interfaces.file_data import FileData -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep -from unstructured.ingest.v2.pipeline.utils import sterilize_dict -from unstructured.ingest.v2.processes.uncompress import Uncompressor - -STEP_ID = "uncompress" - - -class UncompressStepResponse(TypedDict): - file_data_path: str - path: str - - -class UncompressStep(PipelineStep): - process: Uncompressor - identifier: str = STEP_ID - - def __post_init__(self): - config = ( - sterilize_dict(self.process.config.to_dict(redact_sensitive=True)) - if self.process.config - else None - ) - logger.info(f"Created {self.identifier} with configs: {config}") - - def _run(self, path: str, file_data_path: str) -> list[UncompressStepResponse]: - file_data = FileData.from_file(path=file_data_path) - new_file_data = self.process.run(file_data=file_data) - responses = [] - for new_file in new_file_data: - new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json" - new_file.to_file(path=str(new_file_data_path.resolve())) - responses.append( - UncompressStepResponse( - path=new_file.source_identifiers.fullpath, - file_data_path=str(new_file_data_path), - ) - ) - return responses - - async def _run_async( - self, fn: Callable, path: str, file_data_path: str - ) -> list[UncompressStepResponse]: - file_data = FileData.from_file(path=file_data_path) - fn_kwargs = {"file_data": file_data} - if not asyncio.iscoroutinefunction(fn): - new_file_data = fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - new_file_data = await fn(**fn_kwargs) - else: - new_file_data = await fn(**fn_kwargs) - responses = [] - for new_file in new_file_data: - new_file_data_path = Path(file_data_path).parent / f"{new_file.identifier}.json" - new_file.to_file(path=str(new_file_data_path.resolve())) - responses.append( - UncompressStepResponse( - path=new_file.source_identifiers.fullpath, - file_data_path=str(new_file_data_path), - ) - ) - return responses diff --git a/unstructured/ingest/v2/pipeline/steps/upload.py b/unstructured/ingest/v2/pipeline/steps/upload.py deleted file mode 100644 index dc58d46ac..000000000 --- a/unstructured/ingest/v2/pipeline/steps/upload.py +++ /dev/null @@ -1,73 +0,0 @@ -import asyncio -from dataclasses import dataclass -from pathlib import Path -from typing import Callable, Optional, TypedDict - -from unstructured.ingest.v2.interfaces import FileData -from unstructured.ingest.v2.interfaces.uploader import UploadContent, Uploader -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.pipeline.interfaces import PipelineStep, iterable_input, timed -from unstructured.ingest.v2.pipeline.utils import sterilize_dict - -STEP_ID = "upload" - - -class UploadStepContent(TypedDict): - path: str - file_data_path: str - - -@dataclass -class UploadStep(PipelineStep): - process: Uploader - identifier: str = STEP_ID - - def __str__(self): - return f"{self.identifier} ({self.process.__class__.__name__})" - - def __post_init__(self): - config = ( - sterilize_dict(self.process.upload_config.to_dict(redact_sensitive=True)) - if self.process.upload_config - else None - ) - connection_config = ( - sterilize_dict(self.process.connection_config.to_dict(redact_sensitive=True)) - if self.process.connection_config - else None - ) - logger.info( - f"Created {self.identifier} with configs: {config}, " - f"connection configs: {connection_config}" - ) - - def process_whole(self, iterable: iterable_input): - self.run(contents=iterable) - - @timed - def __call__(self, iterable: iterable_input): - logger.info( - f"Calling {self.__class__.__name__} " f"with {len(iterable)} docs", # type: ignore - ) - if self.process.is_async(): - self.process_async(iterable=iterable) - else: - self.process_whole(iterable=iterable) - - def _run(self, fn: Callable, contents: list[UploadStepContent]): - upload_contents = [ - UploadContent(path=Path(c["path"]), file_data=FileData.from_file(c["file_data_path"])) - for c in contents - ] - fn(contents=upload_contents) - - async def _run_async(self, path: str, file_data_path: str, fn: Optional[Callable] = None): - fn = fn or self.process.run_async - fn_kwargs = {"path": Path(path), "file_data": FileData.from_file(path=file_data_path)} - if not asyncio.iscoroutinefunction(fn): - fn(**fn_kwargs) - elif semaphore := self.context.semaphore: - async with semaphore: - await fn(**fn_kwargs) - else: - await fn(**fn_kwargs) diff --git a/unstructured/ingest/v2/pipeline/utils.py b/unstructured/ingest/v2/pipeline/utils.py deleted file mode 100644 index e684ebb10..000000000 --- a/unstructured/ingest/v2/pipeline/utils.py +++ /dev/null @@ -1,16 +0,0 @@ -import json -from datetime import datetime -from pathlib import Path -from typing import Any - - -def sterilize_dict(data: dict[str, Any]) -> dict[str, Any]: - def json_serial(obj: Any) -> str: - if isinstance(obj, Path): - return obj.as_posix() - if isinstance(obj, datetime): - return obj.isoformat() - raise TypeError("Type %s not serializable" % type(obj)) - - data_s = json.dumps(data, default=json_serial) - return json.loads(data_s) diff --git a/unstructured/ingest/v2/processes/__init__.py b/unstructured/ingest/v2/processes/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/unstructured/ingest/v2/processes/chunker.py b/unstructured/ingest/v2/processes/chunker.py deleted file mode 100644 index 11dffb073..000000000 --- a/unstructured/ingest/v2/processes/chunker.py +++ /dev/null @@ -1,96 +0,0 @@ -from abc import ABC -from dataclasses import dataclass, fields -from pathlib import Path -from typing import Any, Optional - -from unstructured.chunking import dispatch -from unstructured.documents.elements import Element, assign_and_map_hash_ids -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.v2.interfaces.process import BaseProcess -from unstructured.ingest.v2.logger import logger -from unstructured.staging.base import dict_to_elements, elements_from_json - - -@dataclass -class ChunkerConfig(EnhancedDataClassJsonMixin): - chunking_strategy: Optional[str] = None - chunking_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general" - chunk_by_api: bool = False - chunk_api_key: Optional[str] = enhanced_field(default=None, sensitive=True) - - chunk_combine_text_under_n_chars: Optional[int] = None - chunk_include_orig_elements: Optional[bool] = None - chunk_max_characters: Optional[int] = None - chunk_multipage_sections: Optional[bool] = None - chunk_new_after_n_chars: Optional[int] = None - chunk_overlap: Optional[int] = None - chunk_overlap_all: Optional[bool] = None - - def to_chunking_kwargs(self) -> dict[str, Any]: - return { - "chunking_strategy": self.chunking_strategy, - "combine_under_n_chars": self.chunk_combine_text_under_n_chars, - "max_characters": self.chunk_max_characters, - "include_orig_elements": self.chunk_include_orig_elements, - "multipage_sections": self.chunk_multipage_sections, - "new_after_n_chars": self.chunk_new_after_n_chars, - "overlap": self.chunk_overlap, - "overlap_all": self.chunk_overlap_all, - } - - -@dataclass -class Chunker(BaseProcess, ABC): - config: ChunkerConfig - - def is_async(self) -> bool: - return self.config.chunk_by_api - - def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: - elements = elements_from_json(filename=str(elements_filepath)) - if not elements: - return elements - local_chunking_strategies = ("basic", "by_title") - if self.config.chunking_strategy not in local_chunking_strategies: - logger.warning( - "chunking strategy not supported for local chunking: {}, must be one of: {}".format( - self.config.chunking_strategy, ", ".join(local_chunking_strategies) - ) - ) - return elements - chunked_elements = dispatch.chunk(elements=elements, **self.config.to_chunking_kwargs()) - assign_and_map_hash_ids(chunked_elements) - return chunked_elements - - async def run_async(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: - from unstructured_client import UnstructuredClient - from unstructured_client.models.shared import Files, PartitionParameters - - client = UnstructuredClient( - api_key_auth=self.config.chunk_api_key, - server_url=self.config.chunking_endpoint, - ) - partition_request = self.config.to_chunking_kwargs() - possible_fields = [f.name for f in fields(PartitionParameters)] - filtered_partition_request = { - k: v for k, v in partition_request.items() if k in possible_fields - } - if len(filtered_partition_request) != len(partition_request): - logger.debug( - "Following fields were omitted due to not being " - "supported by the currently used unstructured client: {}".format( - ", ".join([v for v in partition_request if v not in filtered_partition_request]) - ) - ) - with open(elements_filepath, "rb") as f: - files = Files( - content=f.read(), - file_name=str(elements_filepath.resolve()), - ) - filtered_partition_request["files"] = files - partition_params = PartitionParameters(**filtered_partition_request) - resp = client.general.partition(partition_params) - elements_raw = resp.elements or [] - elements = dict_to_elements(elements_raw) - assign_and_map_hash_ids(elements) - return elements diff --git a/unstructured/ingest/v2/processes/connector_registry.py b/unstructured/ingest/v2/processes/connector_registry.py deleted file mode 100644 index 41abdd4c8..000000000 --- a/unstructured/ingest/v2/processes/connector_registry.py +++ /dev/null @@ -1,63 +0,0 @@ -from dataclasses import dataclass -from typing import Optional, Type, TypeVar - -from unstructured.ingest.v2.interfaces import ( - ConnectionConfig, - Downloader, - DownloaderConfig, - Indexer, - IndexerConfig, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) - -IndexerT = TypeVar("IndexerT", bound=Indexer) -IndexerConfigT = TypeVar("IndexerConfigT", bound=IndexerConfig) -DownloaderT = TypeVar("DownloaderT", bound=Downloader) -DownloaderConfigT = TypeVar("DownloaderConfigT", bound=DownloaderConfig) -ConnectionConfigT = TypeVar("ConnectionConfigT", bound=ConnectionConfig) -UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig) -UploadStagerT = TypeVar("UploadStagerT", bound=UploadStager) -UploaderConfigT = TypeVar("UploaderConfigT", bound=UploaderConfig) -UploaderT = TypeVar("UploaderT", bound=Uploader) - - -@dataclass -class SourceRegistryEntry: - indexer: Type[IndexerT] - downloader: Type[DownloaderT] - - downloader_config: Optional[Type[DownloaderConfigT]] = None - indexer_config: Optional[Type[IndexerConfigT]] = None - connection_config: Optional[Type[ConnectionConfigT]] = None - - -source_registry: dict[str, SourceRegistryEntry] = {} - - -def add_source_entry(source_type: str, entry: SourceRegistryEntry): - if source_type in source_registry: - raise ValueError(f"source {source_type} has already been registered") - source_registry[source_type] = entry - - -@dataclass -class DestinationRegistryEntry: - uploader: Type[UploaderT] - upload_stager: Optional[Type[UploadStagerT]] = None - - upload_stager_config: Optional[Type[UploadStagerConfigT]] = None - uploader_config: Optional[Type[UploaderConfigT]] = None - - connection_config: Optional[Type[ConnectionConfigT]] = None - - -destination_registry: dict[str, DestinationRegistryEntry] = {} - - -def add_destination_entry(destination_type: str, entry: DestinationRegistryEntry): - if destination_type in destination_registry: - raise ValueError(f"destination {destination_type} has already been registered") - destination_registry[destination_type] = entry diff --git a/unstructured/ingest/v2/processes/connectors/__init__.py b/unstructured/ingest/v2/processes/connectors/__init__.py deleted file mode 100644 index 5e4e2cf13..000000000 --- a/unstructured/ingest/v2/processes/connectors/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import annotations - -import unstructured.ingest.v2.processes.connectors.fsspec # noqa: F401 -from unstructured.ingest.v2.processes.connector_registry import ( - add_destination_entry, - add_source_entry, -) - -from .astradb import CONNECTOR_TYPE as ASTRADB_CONNECTOR_TYPE -from .astradb import astradb_destination_entry -from .chroma import CONNECTOR_TYPE as CHROMA_CONNECTOR_TYPE -from .chroma import chroma_destination_entry -from .databricks_volumes import CONNECTOR_TYPE as DATABRICKS_VOLUMES_CONNECTOR_TYPE -from .databricks_volumes import databricks_volumes_destination_entry -from .elasticsearch import CONNECTOR_TYPE as ELASTICSEARCH_CONNECTOR_TYPE -from .elasticsearch import elasticsearch_destination_entry, elasticsearch_source_entry -from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE -from .google_drive import google_drive_source_entry -from .local import CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE -from .local import local_destination_entry, local_source_entry -from .mongodb import CONNECTOR_TYPE as MONGODB_CONNECTOR_TYPE -from .mongodb import mongodb_destination_entry -from .onedrive import CONNECTOR_TYPE as ONEDRIVE_CONNECTOR_TYPE -from .onedrive import onedrive_source_entry -from .opensearch import CONNECTOR_TYPE as OPENSEARCH_CONNECTOR_TYPE -from .opensearch import opensearch_destination_entry, opensearch_source_entry -from .pinecone import CONNECTOR_TYPE as PINECONE_CONNECTOR_TYPE -from .pinecone import pinecone_destination_entry -from .salesforce import CONNECTOR_TYPE as SALESFORCE_CONNECTOR_TYPE -from .salesforce import salesforce_source_entry -from .sharepoint import CONNECTOR_TYPE as SHAREPOINT_CONNECTOR_TYPE -from .sharepoint import sharepoint_source_entry -from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE -from .singlestore import singlestore_destination_entry -from .sql import CONNECTOR_TYPE as SQL_CONNECTOR_TYPE -from .sql import sql_destination_entry -from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE -from .weaviate import weaviate_destination_entry - -add_destination_entry(destination_type=ASTRADB_CONNECTOR_TYPE, entry=astradb_destination_entry) - -add_destination_entry(destination_type=CHROMA_CONNECTOR_TYPE, entry=chroma_destination_entry) - -add_source_entry(source_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_source_entry) -add_destination_entry( - destination_type=ELASTICSEARCH_CONNECTOR_TYPE, entry=elasticsearch_destination_entry -) - -add_source_entry(source_type=GOOGLE_DRIVE_CONNECTOR_TYPE, entry=google_drive_source_entry) - -add_source_entry(source_type=LOCAL_CONNECTOR_TYPE, entry=local_source_entry) -add_destination_entry(destination_type=LOCAL_CONNECTOR_TYPE, entry=local_destination_entry) - -add_source_entry(source_type=ONEDRIVE_CONNECTOR_TYPE, entry=onedrive_source_entry) - -add_source_entry(source_type=OPENSEARCH_CONNECTOR_TYPE, entry=opensearch_source_entry) -add_destination_entry( - destination_type=OPENSEARCH_CONNECTOR_TYPE, entry=opensearch_destination_entry -) - -add_source_entry(source_type=SALESFORCE_CONNECTOR_TYPE, entry=salesforce_source_entry) - -add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_destination_entry) - -add_destination_entry( - destination_type=DATABRICKS_VOLUMES_CONNECTOR_TYPE, entry=databricks_volumes_destination_entry -) - -add_destination_entry(destination_type=SQL_CONNECTOR_TYPE, entry=sql_destination_entry) - -add_destination_entry(destination_type=MONGODB_CONNECTOR_TYPE, entry=mongodb_destination_entry) -add_destination_entry(destination_type=PINECONE_CONNECTOR_TYPE, entry=pinecone_destination_entry) -add_source_entry(source_type=SHAREPOINT_CONNECTOR_TYPE, entry=sharepoint_source_entry) -add_destination_entry( - destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry -) diff --git a/unstructured/ingest/v2/processes/connectors/astradb.py b/unstructured/ingest/v2/processes/connectors/astradb.py deleted file mode 100644 index dc10862e8..000000000 --- a/unstructured/ingest/v2/processes/connectors/astradb.py +++ /dev/null @@ -1,151 +0,0 @@ -import json -from dataclasses import dataclass, field -from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional - -from unstructured import __name__ as integration_name -from unstructured.__version__ import __version__ as integration_version -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - FileData, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, -) -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from astrapy.db import AstraDBCollection - -CONNECTOR_TYPE = "astradb" - - -@dataclass -class AstraDBAccessConfig(AccessConfig): - token: str - api_endpoint: str - - -@dataclass -class AstraDBConnectionConfig(ConnectionConfig): - connection_type: str = CONNECTOR_TYPE - access_config: AstraDBAccessConfig = enhanced_field(sensitive=True) - - -@dataclass -class AstraDBUploadStagerConfig(UploadStagerConfig): - pass - - -@dataclass -class AstraDBUploadStager(UploadStager): - upload_stager_config: AstraDBUploadStagerConfig = field( - default_factory=lambda: AstraDBUploadStagerConfig() - ) - - def conform_dict(self, element_dict: dict) -> dict: - return { - "$vector": element_dict.pop("embeddings", None), - "content": element_dict.pop("text", None), - "metadata": element_dict, - } - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - conformed_elements = [] - for element in elements_contents: - conformed_elements.append(self.conform_dict(element_dict=element)) - output_path = Path(output_dir) / Path(f"{output_filename}.json") - with open(output_path, "w") as output_file: - json.dump(conformed_elements, output_file) - return output_path - - -@dataclass -class AstraDBUploaderConfig(UploaderConfig): - collection_name: str - embedding_dimension: int - namespace: Optional[str] = None - requested_indexing_policy: Optional[dict[str, Any]] = None - batch_size: int = 20 - - -@dataclass -class AstraDBUploader(Uploader): - connection_config: AstraDBConnectionConfig - upload_config: AstraDBUploaderConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["astrapy"], extras="astradb") - def get_collection(self) -> "AstraDBCollection": - from astrapy.db import AstraDB - - # Get the collection_name and embedding dimension - collection_name = self.upload_config.collection_name - embedding_dimension = self.upload_config.embedding_dimension - requested_indexing_policy = self.upload_config.requested_indexing_policy - - # If the user has requested an indexing policy, pass it to the Astra DB - options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None - - # Build the Astra DB object. - # caller_name/version for Astra DB tracking - astra_db = AstraDB( - api_endpoint=self.connection_config.access_config.api_endpoint, - token=self.connection_config.access_config.token, - namespace=self.upload_config.namespace, - caller_name=integration_name, - caller_version=integration_version, - ) - - # Create and connect to the newly created collection - astra_db_collection = astra_db.create_collection( - collection_name=collection_name, - dimension=embedding_dimension, - options=options, - ) - return astra_db_collection - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - - logger.info( - f"writing {len(elements_dict)} objects to destination " - f"collection {self.upload_config.collection_name}" - ) - - astra_batch_size = self.upload_config.batch_size - collection = self.get_collection() - - for chunk in batch_generator(elements_dict, astra_batch_size): - collection.insert_many(chunk) - - -astradb_destination_entry = DestinationRegistryEntry( - connection_config=AstraDBConnectionConfig, - upload_stager_config=AstraDBUploadStagerConfig, - upload_stager=AstraDBUploadStager, - uploader_config=AstraDBUploaderConfig, - uploader=AstraDBUploader, -) diff --git a/unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py b/unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py deleted file mode 100644 index aab7cfba4..000000000 --- a/unstructured/ingest/v2/processes/connectors/azure_cognitive_search.py +++ /dev/null @@ -1,208 +0,0 @@ -import json -import typing as t -import uuid -from dataclasses import dataclass, field -from pathlib import Path - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError, WriteError -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - add_destination_entry, -) -from unstructured.ingest.v2.processes.connectors.utils import parse_datetime -from unstructured.utils import requires_dependencies - -if t.TYPE_CHECKING: - from azure.search.documents import SearchClient - - -CONNECTOR_TYPE = "azure_cognitive_search" - - -@dataclass -class AzureCognitiveSearchAccessConfig(AccessConfig): - key: t.Optional[str] = enhanced_field(default=None, overload_name="azure_cognitive_search_key") - - -@dataclass -class AzureCognitiveSearchConnectionConfig(ConnectionConfig): - endpoint: str - index: str - access_config: AzureCognitiveSearchAccessConfig = enhanced_field(sensitive=True) - - @requires_dependencies(["azure.search", "azure.core"], extras="azure-cognitive-search") - def generate_client(self) -> "SearchClient": - from azure.core.credentials import AzureKeyCredential - from azure.search.documents import SearchClient - - return SearchClient( - endpoint=self.endpoint, - index_name=self.index, - credential=AzureKeyCredential(self.access_config.key), - ) - - -@dataclass -class AzureCognitiveSearchUploadStagerConfig(UploadStagerConfig): - pass - - -@dataclass -class AzureCognitiveSearchUploaderConfig(UploaderConfig): - batch_size: int = 100 - - -@dataclass -class AzureCognitiveSearchUploadStager(UploadStager): - upload_stager_config: AzureCognitiveSearchUploadStagerConfig = field( - default_factory=lambda: AzureCognitiveSearchUploadStagerConfig() - ) - - @staticmethod - def conform_dict(data: dict) -> dict: - """ - updates the dictionary that is from each Element being converted into a dict/json - into a dictionary that conforms to the schema expected by the - Azure Cognitive Search index - """ - - data["id"] = str(uuid.uuid4()) - - if points := data.get("metadata", {}).get("coordinates", {}).get("points"): - data["metadata"]["coordinates"]["points"] = json.dumps(points) - if version := data.get("metadata", {}).get("data_source", {}).get("version"): - data["metadata"]["data_source"]["version"] = str(version) - if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"): - data["metadata"]["data_source"]["record_locator"] = json.dumps(record_locator) - if permissions_data := ( - data.get("metadata", {}).get("data_source", {}).get("permissions_data") - ): - data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data) - if links := data.get("metadata", {}).get("links"): - data["metadata"]["links"] = [json.dumps(link) for link in links] - if last_modified := data.get("metadata", {}).get("last_modified"): - data["metadata"]["last_modified"] = parse_datetime(last_modified).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ" - ) - if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"): - data["metadata"]["data_source"]["date_created"] = parse_datetime(date_created).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ" - ) - - if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"): - data["metadata"]["data_source"]["date_modified"] = parse_datetime( - date_modified - ).strftime("%Y-%m-%dT%H:%M:%S.%fZ") - - if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"): - data["metadata"]["data_source"]["date_processed"] = parse_datetime( - date_processed - ).strftime("%Y-%m-%dT%H:%M:%S.%fZ") - - if page_number := data.get("metadata", {}).get("page_number"): - data["metadata"]["page_number"] = str(page_number) - return data - - def run( - self, - elements_filepath: Path, - output_dir: Path, - output_filename: str, - **kwargs: t.Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - - conformed_elements = [self.conform_dict(data=element) for element in elements_contents] - - output_path = Path(output_dir) / Path(f"{output_filename}.json") - with open(output_path, "w") as output_file: - json.dump(conformed_elements, output_file) - return output_path - - -@dataclass -class AzureCognitiveSearchUploader(Uploader): - upload_config: AzureCognitiveSearchUploaderConfig - connection_config: AzureCognitiveSearchConnectionConfig - connector_type: str = CONNECTOR_TYPE - - @DestinationConnectionError.wrap - @requires_dependencies(["azure"], extras="azure-cognitive-search") - def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None: - import azure.core.exceptions - - logger.info( - f"writing {len(elements_dict)} documents to destination " - f"index at {self.connection_config.index}", - ) - try: - results = self.connection_config.generate_client().upload_documents( - documents=elements_dict - ) - - except azure.core.exceptions.HttpResponseError as http_error: - raise WriteError(f"http error: {http_error}") from http_error - errors = [] - success = [] - for result in results: - if result.succeeded: - success.append(result) - else: - errors.append(result) - logger.debug(f"results: {len(success)} successes, {len(errors)} failures") - if errors: - raise WriteError( - ", ".join( - [ - f"{error.key}: [{error.status_code}] {error.error_message}" - for error in errors - ], - ), - ) - - def write_dict_wrapper(self, elements_dict): - return self.write_dict(elements_dict=elements_dict) - - def run(self, contents: list[UploadContent], **kwargs: t.Any) -> None: - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - - logger.info( - f"writing document batches to destination" - f" endpoint at {str(self.connection_config.endpoint)}" - f" index at {str(self.connection_config.index)}" - f" with batch size {str(self.upload_config.batch_size)}" - ) - - batch_size = self.upload_config.batch_size - - for chunk in batch_generator(elements_dict, batch_size): - self.write_dict(elements_dict=chunk) # noqa: E203 - - -add_destination_entry( - destination_type=CONNECTOR_TYPE, - entry=DestinationRegistryEntry( - connection_config=AzureCognitiveSearchConnectionConfig, - uploader=AzureCognitiveSearchUploader, - uploader_config=AzureCognitiveSearchUploaderConfig, - upload_stager=AzureCognitiveSearchUploadStager, - upload_stager_config=AzureCognitiveSearchUploadStagerConfig, - ), -) diff --git a/unstructured/ingest/v2/processes/connectors/chroma.py b/unstructured/ingest/v2/processes/connectors/chroma.py deleted file mode 100644 index e28e3d7f7..000000000 --- a/unstructured/ingest/v2/processes/connectors/chroma.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import annotations - -import json -import uuid -from dataclasses import dataclass, field -from datetime import date, datetime -from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Optional - -from chromadb.config import Settings -from dateutil import parser - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - FileData, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, -) -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from chromadb.api import ClientAPI - -CONNECTOR_TYPE = "chroma" - - -@dataclass -class ChromaAccessConfig(AccessConfig): - settings: Optional[Settings] = None - headers: Optional[Dict[str, str]] = None - - -@dataclass -class ChromaConnectionConfig(ConnectionConfig): - collection_name: str - access_config: ChromaAccessConfig = enhanced_field(sensitive=True) - path: Optional[str] = None - tenant: str = "default_tenant" - database: str = "default_database" - host: Optional[str] = None - port: Optional[int] = None - ssl: bool = False - connector_type: str = CONNECTOR_TYPE - - -@dataclass -class ChromaUploadStagerConfig(UploadStagerConfig): - pass - - -@dataclass -class ChromaUploadStager(UploadStager): - upload_stager_config: ChromaUploadStagerConfig = field( - default_factory=lambda: ChromaUploadStagerConfig() - ) - - @staticmethod - def parse_date_string(date_string: str) -> date: - try: - timestamp = float(date_string) - return datetime.fromtimestamp(timestamp) - except Exception as e: - logger.debug(f"date {date_string} string not a timestamp: {e}") - return parser.parse(date_string) - - @staticmethod - def conform_dict(data: dict) -> dict: - """ - Prepares dictionary in the format that Chroma requires - """ - element_id = data.get("element_id", str(uuid.uuid4())) - return { - "id": element_id, - "embedding": data.pop("embeddings", None), - "document": data.pop("text", None), - "metadata": flatten_dict(data, separator="-", flatten_lists=True, remove_none=True), - } - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - conformed_elements = [self.conform_dict(data=element) for element in elements_contents] - output_path = Path(output_dir) / Path(f"{output_filename}.json") - with open(output_path, "w") as output_file: - json.dump(conformed_elements, output_file) - return output_path - - -@dataclass -class ChromaUploaderConfig(UploaderConfig): - batch_size: int = 100 - - -@dataclass -class ChromaUploader(Uploader): - connector_type: str = CONNECTOR_TYPE - upload_config: ChromaUploaderConfig - connection_config: ChromaConnectionConfig - client: Optional[ClientAPI] = field(init=False) - - def __post_init__(self): - self.client = self.create_client() - - @requires_dependencies(["chromadb"], extras="chroma") - def create_client(self) -> ClientAPI: - import chromadb - - if self.connection_config.path: - return chromadb.PersistentClient( - path=self.connection_config.path, - settings=self.connection_config.access_config.settings, - tenant=self.connection_config.tenant, - database=self.connection_config.database, - ) - - elif self.connection_config.host and self.connection_config.port: - return chromadb.HttpClient( - host=self.connection_config.host, - port=self.connection_config.port, - ssl=self.connection_config.ssl, - headers=self.connection_config.access_config.headers, - settings=self.connection_config.access_config.settings, - tenant=self.connection_config.tenant, - database=self.connection_config.database, - ) - else: - raise ValueError("Chroma connector requires either path or host and port to be set.") - - @DestinationConnectionError.wrap - def upsert_batch(self, collection, batch): - - try: - # Chroma wants lists even if there is only one element - # Upserting to prevent duplicates - collection.upsert( - ids=batch["ids"], - documents=batch["documents"], - embeddings=batch["embeddings"], - metadatas=batch["metadatas"], - ) - except Exception as e: - raise ValueError(f"chroma error: {e}") from e - - @staticmethod - def prepare_chroma_list(chunk: tuple[dict[str, Any]]) -> dict[str, list[Any]]: - """Helper function to break a tuple of dicts into list of parallel lists for ChromaDb. - ({'id':1}, {'id':2}, {'id':3}) -> {'ids':[1,2,3]}""" - chroma_dict = {} - chroma_dict["ids"] = [x.get("id") for x in chunk] - chroma_dict["documents"] = [x.get("document") for x in chunk] - chroma_dict["embeddings"] = [x.get("embedding") for x in chunk] - chroma_dict["metadatas"] = [x.get("metadata") for x in chunk] - # Make sure all lists are of the same length - assert ( - len(chroma_dict["ids"]) - == len(chroma_dict["documents"]) - == len(chroma_dict["embeddings"]) - == len(chroma_dict["metadatas"]) - ) - return chroma_dict - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - - logger.info( - f"writing {len(elements_dict)} objects to destination " - f"collection {self.connection_config.collection_name} " - f"at {self.connection_config.host}", - ) - - collection = self.client.get_or_create_collection( - name=self.connection_config.collection_name - ) - for chunk in batch_generator(elements_dict, self.upload_config.batch_size): - self.upsert_batch(collection, self.prepare_chroma_list(chunk)) - - -chroma_destination_entry = DestinationRegistryEntry( - connection_config=ChromaConnectionConfig, - uploader=ChromaUploader, - uploader_config=ChromaUploaderConfig, - upload_stager=ChromaUploadStager, - upload_stager_config=ChromaUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/databricks_volumes.py b/unstructured/ingest/v2/processes/connectors/databricks_volumes.py deleted file mode 100644 index e875535c2..000000000 --- a/unstructured/ingest/v2/processes/connectors/databricks_volumes.py +++ /dev/null @@ -1,96 +0,0 @@ -import os -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Any, Optional - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - UploadContent, - Uploader, - UploaderConfig, -) -from unstructured.ingest.v2.processes.connector_registry import DestinationRegistryEntry -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from databricks.sdk import WorkspaceClient - -CONNECTOR_TYPE = "databricks_volumes" - - -@dataclass -class DatabricksVolumesAccessConfig(AccessConfig): - account_id: Optional[str] = None - username: Optional[str] = None - password: Optional[str] = None - client_id: Optional[str] = None - client_secret: Optional[str] = None - token: Optional[str] = None - profile: Optional[str] = None - azure_workspace_resource_id: Optional[str] = None - azure_client_secret: Optional[str] = None - azure_client_id: Optional[str] = None - azure_tenant_id: Optional[str] = None - azure_environment: Optional[str] = None - auth_type: Optional[str] = None - cluster_id: Optional[str] = None - google_credentials: Optional[str] = None - google_service_account: Optional[str] = None - - -@dataclass -class DatabricksVolumesConnectionConfig(ConnectionConfig): - access_config: DatabricksVolumesAccessConfig = enhanced_field( - default_factory=DatabricksVolumesAccessConfig, sensitive=True - ) - host: Optional[str] = None - - -@dataclass -class DatabricksVolumesUploaderConfig(UploaderConfig): - volume: str - catalog: str - volume_path: Optional[str] = None - overwrite: bool = False - schema: str = "default" - - @property - def path(self) -> str: - path = f"/Volumes/{self.catalog}/{self.schema}/{self.volume}" - if self.volume_path: - path = f"{path}/{self.volume_path}" - return path - - -@dataclass -class DatabricksVolumesUploader(Uploader): - connector_type: str = CONNECTOR_TYPE - upload_config: DatabricksVolumesUploaderConfig - connection_config: DatabricksVolumesConnectionConfig - client: Optional["WorkspaceClient"] = field(init=False, default=None) - - @requires_dependencies(dependencies=["databricks.sdk"], extras="databricks-volumes") - def __post_init__(self) -> "WorkspaceClient": - from databricks.sdk import WorkspaceClient - - self.client = WorkspaceClient( - host=self.connection_config.host, **self.connection_config.access_config.to_dict() - ) - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - for content in contents: - with open(content.path, "rb") as elements_file: - output_path = os.path.join(self.upload_config.path, content.path.name) - self.client.files.upload( - file_path=output_path, - contents=elements_file, - overwrite=self.upload_config.overwrite, - ) - - -databricks_volumes_destination_entry = DestinationRegistryEntry( - connection_config=DatabricksVolumesConnectionConfig, - uploader=DatabricksVolumesUploader, - uploader_config=DatabricksVolumesUploaderConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/elasticsearch.py b/unstructured/ingest/v2/processes/connectors/elasticsearch.py deleted file mode 100644 index 4a45bae1b..000000000 --- a/unstructured/ingest/v2/processes/connectors/elasticsearch.py +++ /dev/null @@ -1,401 +0,0 @@ -import hashlib -import json -import sys -import uuid -from dataclasses import dataclass, field -from pathlib import Path -from time import time -from typing import TYPE_CHECKING, Any, Generator, Optional - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.utils.data_prep import generator_batching_wbytes -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - DownloadResponse, - FileData, - Indexer, - IndexerConfig, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, - download_responses, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from elasticsearch import Elasticsearch as ElasticsearchClient - -CONNECTOR_TYPE = "elasticsearch" - - -@dataclass -class ElasticsearchAccessConfig(AccessConfig): - password: Optional[str] = None - api_key: Optional[str] = enhanced_field(default=None, overload_name="es_api_key") - bearer_auth: Optional[str] = None - ssl_assert_fingerprint: Optional[str] = None - - -@dataclass -class ElasticsearchClientInput(EnhancedDataClassJsonMixin): - hosts: Optional[list[str]] = None - cloud_id: Optional[str] = None - ca_certs: Optional[str] = None - basic_auth: Optional[tuple[str, str]] = enhanced_field(sensitive=True, default=None) - api_key: Optional[str] = enhanced_field(sensitive=True, default=None) - - -@dataclass -class ElasticsearchConnectionConfig(ConnectionConfig): - hosts: Optional[list[str]] = None - username: Optional[str] = None - cloud_id: Optional[str] = None - api_key_id: Optional[str] = None - ca_certs: Optional[str] = None - access_config: ElasticsearchAccessConfig = enhanced_field(sensitive=True) - - def get_client_kwargs(self) -> dict: - # Update auth related fields to conform to what the SDK expects based on the - # supported methods: - # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/connecting.html - client_input = ElasticsearchClientInput() - if self.hosts: - client_input.hosts = self.hosts - if self.cloud_id: - client_input.cloud_id = self.cloud_id - if self.ca_certs: - client_input.ca_certs = self.ca_certs - if self.access_config.password and ( - self.cloud_id or self.ca_certs or self.access_config.ssl_assert_fingerprint - ): - client_input.basic_auth = ("elastic", self.access_config.password) - elif not self.cloud_id and self.username and self.access_config.password: - client_input.basic_auth = (self.username, self.access_config.password) - elif self.access_config.api_key and self.api_key_id: - client_input.api_key = (self.api_key_id, self.access_config.api_key) - elif self.access_config.api_key: - client_input.api_key = self.access_config.api_key - logger.debug( - f"Elasticsearch client inputs mapped to: {client_input.to_dict(redact_sensitive=True)}" - ) - client_kwargs = client_input.to_dict(redact_sensitive=False) - client_kwargs = {k: v for k, v in client_kwargs.items() if v is not None} - return client_kwargs - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def get_client(self) -> "ElasticsearchClient": - from elasticsearch import Elasticsearch as ElasticsearchClient - - client = ElasticsearchClient(**self.get_client_kwargs()) - self.check_connection(client=client) - return client - - def check_connection(self, client: "ElasticsearchClient"): - try: - client.perform_request("HEAD", "/", headers={"accept": "application/json"}) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - -@dataclass -class ElasticsearchIndexerConfig(IndexerConfig): - index_name: str - batch_size: int = 100 - - -@dataclass -class ElasticsearchIndexer(Indexer): - connection_config: ElasticsearchConnectionConfig - index_config: ElasticsearchIndexerConfig - client: "ElasticsearchClient" = field(init=False) - connector_type: str = CONNECTOR_TYPE - - def __post_init__(self): - self.client = self.connection_config.get_client() - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def load_scan(self): - from elasticsearch.helpers import scan - - return scan - - def _get_doc_ids(self) -> set[str]: - """Fetches all document ids in an index""" - scan = self.load_scan() - - scan_query: dict = {"stored_fields": [], "query": {"match_all": {}}} - hits = scan( - self.client, - query=scan_query, - scroll="1m", - index=self.index_config.index_name, - ) - - return {hit["_id"] for hit in hits} - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - all_ids = self._get_doc_ids() - ids = list(all_ids) - id_batches: list[frozenset[str]] = [ - frozenset( - ids[ - i - * self.index_config.batch_size : (i + 1) # noqa - * self.index_config.batch_size - ] - ) - for i in range( - (len(ids) + self.index_config.batch_size - 1) // self.index_config.batch_size - ) - ] - for batch in id_batches: - # Make sure the hash is always a positive number to create identified - identified = str(hash(batch) + sys.maxsize + 1) - yield FileData( - identifier=identified, - connector_type=CONNECTOR_TYPE, - metadata=DataSourceMetadata( - url=f"{self.connection_config.hosts[0]}/{self.index_config.index_name}", - date_processed=str(time()), - ), - additional_metadata={ - "ids": list(batch), - "index_name": self.index_config.index_name, - }, - ) - - -@dataclass -class ElasticsearchDownloaderConfig(DownloaderConfig): - fields: list[str] = field(default_factory=list) - - -@dataclass -class ElasticsearchDownloader(Downloader): - connection_config: ElasticsearchConnectionConfig - download_config: ElasticsearchDownloaderConfig - connector_type: str = CONNECTOR_TYPE - - def is_async(self) -> bool: - return True - - def get_identifier(self, index_name: str, record_id: str) -> str: - f = f"{index_name}-{record_id}" - if self.download_config.fields: - f = "{}-{}".format( - f, - hashlib.sha256(",".join(self.download_config.fields).encode()).hexdigest()[:8], - ) - return f - - def map_es_results(self, es_results: dict) -> str: - doc_body = es_results["_source"] - flattened_dict = flatten_dict(dictionary=doc_body) - str_values = [str(value) for value in flattened_dict.values()] - concatenated_values = "\n".join(str_values) - return concatenated_values - - def generate_download_response( - self, result: dict, index_name: str, file_data: FileData - ) -> DownloadResponse: - record_id = result["_id"] - filename_id = self.get_identifier(index_name=index_name, record_id=record_id) - filename = f"{filename_id}.txt" - download_path = self.download_dir / Path(filename) - logger.debug( - f"Downloading results from index {index_name} and id {record_id} to {download_path}" - ) - download_path.parent.mkdir(parents=True, exist_ok=True) - try: - with open(download_path, "w", encoding="utf8") as f: - f.write(self.map_es_results(es_results=result)) - except Exception as e: - logger.error( - f"failed to download from index {index_name} " - f"and id {record_id} to {download_path}: {e}", - exc_info=True, - ) - raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}") - return DownloadResponse( - file_data=FileData( - identifier=filename_id, - connector_type=CONNECTOR_TYPE, - metadata=DataSourceMetadata( - version=str(result["_version"]) if "_version" in result else None, - date_processed=str(time()), - record_locator={ - "hosts": self.connection_config.hosts, - "index_name": index_name, - "document_id": record_id, - }, - ), - ), - path=download_path, - ) - - def run(self, file_data: FileData, **kwargs: Any) -> download_responses: - raise NotImplementedError() - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def load_async(self): - from elasticsearch import AsyncElasticsearch - from elasticsearch.helpers import async_scan - - return AsyncElasticsearch, async_scan - - async def run_async(self, file_data: FileData, **kwargs: Any) -> download_responses: - AsyncClient, async_scan = self.load_async() - - index_name: str = file_data.additional_metadata["index_name"] - ids: list[str] = file_data.additional_metadata["ids"] - - scan_query = { - "_source": self.download_config.fields, - "version": True, - "query": {"ids": {"values": ids}}, - } - - download_responses = [] - async with AsyncClient(**self.connection_config.get_client_kwargs()) as client: - async for result in async_scan( - client, - query=scan_query, - scroll="1m", - index=index_name, - ): - download_responses.append( - self.generate_download_response( - result=result, index_name=index_name, file_data=file_data - ) - ) - return download_responses - - -@dataclass -class ElasticsearchUploadStagerConfig(UploadStagerConfig): - index_name: str - - -@dataclass -class ElasticsearchUploadStager(UploadStager): - upload_stager_config: ElasticsearchUploadStagerConfig - - def conform_dict(self, data: dict) -> dict: - resp = { - "_index": self.upload_stager_config.index_name, - "_id": str(uuid.uuid4()), - "_source": { - "element_id": data.pop("element_id", None), - "embeddings": data.pop("embeddings", None), - "text": data.pop("text", None), - "type": data.pop("type", None), - }, - } - if "metadata" in data and isinstance(data["metadata"], dict): - resp["_source"]["metadata"] = flatten_dict(data["metadata"], separator="-") - return resp - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - conformed_elements = [self.conform_dict(data=element) for element in elements_contents] - output_path = Path(output_dir) / Path(f"{output_filename}.json") - with open(output_path, "w") as output_file: - json.dump(conformed_elements, output_file) - return output_path - - -@dataclass -class ElasticsearchUploaderConfig(UploaderConfig): - index_name: str - batch_size_bytes: int = 15_000_000 - num_threads: int = 4 - - -@dataclass -class ElasticsearchUploader(Uploader): - connector_type: str = CONNECTOR_TYPE - upload_config: ElasticsearchUploaderConfig - connection_config: ElasticsearchConnectionConfig - - @requires_dependencies(["elasticsearch"], extras="elasticsearch") - def load_parallel_bulk(self): - from elasticsearch.helpers import parallel_bulk - - return parallel_bulk - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - parallel_bulk = self.load_parallel_bulk() - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - upload_destination = self.connection_config.hosts or self.connection_config.cloud_id - logger.info( - f"writing {len(elements_dict)} elements via document batches to destination " - f"index named {self.upload_config.index_name} at {upload_destination} with " - f"batch size (in bytes) {self.upload_config.batch_size_bytes} with " - f"{self.upload_config.num_threads} (number of) threads" - ) - - client = self.connection_config.get_client() - if not client.indices.exists(index=self.upload_config.index_name): - logger.warning( - f"{(self.__class__.__name__).replace('Uploader', '')} index does not exist: " - f"{self.upload_config.index_name}. " - f"This may cause issues when uploading." - ) - for batch in generator_batching_wbytes( - elements_dict, batch_size_limit_bytes=self.upload_config.batch_size_bytes - ): - for success, info in parallel_bulk( - client=client, - actions=batch, - thread_count=self.upload_config.num_threads, - ): - if not success: - logger.error( - "upload failed for a batch in " - f"{(self.__class__.__name__).replace('Uploader', '')} " - "destination connector:", - info, - ) - - -elasticsearch_source_entry = SourceRegistryEntry( - connection_config=ElasticsearchConnectionConfig, - indexer=ElasticsearchIndexer, - indexer_config=ElasticsearchIndexerConfig, - downloader=ElasticsearchDownloader, - downloader_config=ElasticsearchDownloaderConfig, -) - -elasticsearch_destination_entry = DestinationRegistryEntry( - connection_config=ElasticsearchConnectionConfig, - upload_stager_config=ElasticsearchUploadStagerConfig, - upload_stager=ElasticsearchUploadStager, - uploader_config=ElasticsearchUploaderConfig, - uploader=ElasticsearchUploader, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py b/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py deleted file mode 100644 index eacc0df96..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import annotations - -from unstructured.ingest.v2.processes.connector_registry import ( - add_destination_entry, - add_source_entry, -) - -from .azure import CONNECTOR_TYPE as AZURE_CONNECTOR_TYPE -from .azure import azure_destination_entry, azure_source_entry -from .box import CONNECTOR_TYPE as BOX_CONNECTOR_TYPE -from .box import box_destination_entry, box_source_entry -from .dropbox import CONNECTOR_TYPE as DROPBOX_CONNECTOR_TYPE -from .dropbox import dropbox_destination_entry, dropbox_source_entry -from .gcs import CONNECTOR_TYPE as GCS_CONNECTOR_TYPE -from .gcs import gcs_destination_entry, gcs_source_entry -from .s3 import CONNECTOR_TYPE as S3_CONNECTOR_TYPE -from .s3 import s3_destination_entry, s3_source_entry -from .sftp import CONNECTOR_TYPE as SFTP_CONNECTOR_TYPE -from .sftp import sftp_destination_entry, sftp_source_entry - -add_source_entry(source_type=AZURE_CONNECTOR_TYPE, entry=azure_source_entry) -add_destination_entry(destination_type=AZURE_CONNECTOR_TYPE, entry=azure_destination_entry) - -add_source_entry(source_type=BOX_CONNECTOR_TYPE, entry=box_source_entry) -add_destination_entry(destination_type=BOX_CONNECTOR_TYPE, entry=box_destination_entry) - -add_source_entry(source_type=DROPBOX_CONNECTOR_TYPE, entry=dropbox_source_entry) -add_destination_entry(destination_type=DROPBOX_CONNECTOR_TYPE, entry=dropbox_destination_entry) - -add_source_entry(source_type=GCS_CONNECTOR_TYPE, entry=gcs_source_entry) -add_destination_entry(destination_type=GCS_CONNECTOR_TYPE, entry=gcs_destination_entry) - -add_source_entry(source_type=S3_CONNECTOR_TYPE, entry=s3_source_entry) -add_destination_entry(destination_type=S3_CONNECTOR_TYPE, entry=s3_destination_entry) - -add_source_entry(source_type=SFTP_CONNECTOR_TYPE, entry=sftp_source_entry) -add_destination_entry(destination_type=SFTP_CONNECTOR_TYPE, entry=sftp_destination_entry) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/azure.py b/unstructured/ingest/v2/processes/connectors/fsspec/azure.py deleted file mode 100644 index 8dd756600..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/azure.py +++ /dev/null @@ -1,144 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Generator, Optional - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( - FsspecAccessConfig, - FsspecConnectionConfig, - FsspecDownloader, - FsspecDownloaderConfig, - FsspecIndexer, - FsspecIndexerConfig, - FsspecUploader, - FsspecUploaderConfig, -) -from unstructured.ingest.v2.processes.connectors.fsspec.utils import json_serial, sterilize_dict -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "azure" - - -def azure_json_serial(obj): - from azure.storage.blob._models import ContentSettings - - if isinstance(obj, ContentSettings): - return dict(obj) - if isinstance(obj, bytearray): - return str(obj) - return json_serial(obj) - - -@dataclass -class AzureIndexerConfig(FsspecIndexerConfig): - pass - - -@dataclass -class AzureAccessConfig(FsspecAccessConfig): - account_name: Optional[str] = None - account_key: Optional[str] = None - connection_string: Optional[str] = None - sas_token: Optional[str] = None - - def __post_init__(self): - if self.connection_string is None and self.account_name is None: - raise ValueError("either connection_string or account_name must be set") - - -@dataclass -class AzureConnectionConfig(FsspecConnectionConfig): - supported_protocols: list[str] = field(default_factory=lambda: ["az"]) - access_config: AzureAccessConfig = enhanced_field( - sensitive=True, default_factory=lambda: AzureAccessConfig() - ) - connector_type: str = CONNECTOR_TYPE - - def get_access_config(self) -> dict[str, Any]: - # Avoid injecting None by filtering out k,v pairs where the value is None - access_configs: dict[str, Any] = { - k: v for k, v in self.access_config.to_dict().items() if v - } - return access_configs - - -@dataclass -class AzureIndexer(FsspecIndexer): - connection_config: AzureConnectionConfig - index_config: AzureIndexerConfig - connector_type: str = CONNECTOR_TYPE - - def sterilize_info(self, path) -> dict: - info = self.fs.info(path=path) - return sterilize_dict(data=info, default=azure_json_serial) - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - return super().run(**kwargs) - - -@dataclass -class AzureDownloaderConfig(FsspecDownloaderConfig): - pass - - -@dataclass -class AzureDownloader(FsspecDownloader): - protocol: str = "az" - connection_config: AzureConnectionConfig - connector_type: str = CONNECTOR_TYPE - download_config: Optional[AzureDownloaderConfig] = field(default_factory=AzureDownloaderConfig) - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return super().run(file_data=file_data, **kwargs) - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return await super().run_async(file_data=file_data, **kwargs) - - -@dataclass -class AzureUploaderConfig(FsspecUploaderConfig): - pass - - -@dataclass -class AzureUploader(FsspecUploader): - connector_type: str = CONNECTOR_TYPE - connection_config: AzureConnectionConfig - upload_config: AzureUploaderConfig = field(default=None) - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - def __post_init__(self): - super().__post_init__() - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - return super().run(contents=contents, **kwargs) - - @requires_dependencies(["adlfs", "fsspec"], extras="azure") - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return await super().run_async(path=path, file_data=file_data, **kwargs) - - -azure_source_entry = SourceRegistryEntry( - indexer=AzureIndexer, - indexer_config=AzureIndexerConfig, - downloader=AzureDownloader, - downloader_config=AzureDownloaderConfig, - connection_config=AzureConnectionConfig, -) - -azure_destination_entry = DestinationRegistryEntry( - uploader=AzureUploader, - uploader_config=AzureUploaderConfig, - connection_config=AzureConnectionConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/box.py b/unstructured/ingest/v2/processes/connectors/fsspec/box.py deleted file mode 100644 index 77d60c79e..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/box.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Generator, Optional - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( - FsspecAccessConfig, - FsspecConnectionConfig, - FsspecDownloader, - FsspecDownloaderConfig, - FsspecIndexer, - FsspecIndexerConfig, - FsspecUploader, - FsspecUploaderConfig, -) -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "box" - - -@dataclass -class BoxIndexerConfig(FsspecIndexerConfig): - pass - - -@dataclass -class BoxAccessConfig(FsspecAccessConfig): - box_app_config: Optional[str] = None - - -@dataclass -class BoxConnectionConfig(FsspecConnectionConfig): - supported_protocols: list[str] = field(default_factory=lambda: ["box"]) - access_config: BoxAccessConfig = enhanced_field( - sensitive=True, default_factory=lambda: BoxAccessConfig() - ) - connector_type: str = CONNECTOR_TYPE - - def get_access_config(self) -> dict[str, Any]: - # Return access_kwargs with oauth. The oauth object can not be stored directly in the config - # because it is not serializable. - from boxsdk import JWTAuth - - access_kwargs_with_oauth: dict[str, Any] = { - "oauth": JWTAuth.from_settings_file( - self.access_config.box_app_config, - ), - } - access_config: dict[str, Any] = self.access_config.to_dict() - access_config.pop("box_app_config", None) - access_kwargs_with_oauth.update(access_config) - - return access_kwargs_with_oauth - - -@dataclass -class BoxIndexer(FsspecIndexer): - connection_config: BoxConnectionConfig - index_config: BoxIndexerConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["boxfs"], extras="box") - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - return super().run(**kwargs) - - -@dataclass -class BoxDownloaderConfig(FsspecDownloaderConfig): - pass - - -@dataclass -class BoxDownloader(FsspecDownloader): - protocol: str = "box" - connection_config: BoxConnectionConfig - connector_type: str = CONNECTOR_TYPE - download_config: Optional[BoxDownloaderConfig] = field(default_factory=BoxDownloaderConfig) - - @requires_dependencies(["boxfs"], extras="box") - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return super().run(file_data=file_data, **kwargs) - - @requires_dependencies(["boxfs"], extras="box") - async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return await super().run_async(file_data=file_data, **kwargs) - - -@dataclass -class BoxUploaderConfig(FsspecUploaderConfig): - pass - - -@dataclass -class BoxUploader(FsspecUploader): - connector_type: str = CONNECTOR_TYPE - connection_config: BoxConnectionConfig - upload_config: BoxUploaderConfig = field(default=None) - - @requires_dependencies(["boxfs"], extras="box") - def __post_init__(self): - super().__post_init__() - - @requires_dependencies(["boxfs"], extras="box") - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - return super().run(contents=contents, **kwargs) - - @requires_dependencies(["boxfs"], extras="box") - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return await super().run_async(path=path, file_data=file_data, **kwargs) - - -box_source_entry = SourceRegistryEntry( - indexer=BoxIndexer, - indexer_config=BoxIndexerConfig, - downloader=BoxDownloader, - downloader_config=BoxDownloaderConfig, - connection_config=BoxConnectionConfig, -) - -box_destination_entry = DestinationRegistryEntry( - uploader=BoxUploader, - uploader_config=BoxUploaderConfig, - connection_config=BoxConnectionConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py b/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py deleted file mode 100644 index 96dc3ba71..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/dropbox.py +++ /dev/null @@ -1,130 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Generator, Optional - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( - FsspecAccessConfig, - FsspecConnectionConfig, - FsspecDownloader, - FsspecDownloaderConfig, - FsspecIndexer, - FsspecIndexerConfig, - FsspecUploader, - FsspecUploaderConfig, -) -from unstructured.ingest.v2.processes.connectors.fsspec.utils import sterilize_dict -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "dropbox" - - -@dataclass -class DropboxIndexerConfig(FsspecIndexerConfig): - pass - - -@dataclass -class DropboxAccessConfig(FsspecAccessConfig): - token: Optional[str] = None - - -@dataclass -class DropboxConnectionConfig(FsspecConnectionConfig): - supported_protocols: list[str] = field(default_factory=lambda: ["dropbox"]) - access_config: DropboxAccessConfig = enhanced_field( - sensitive=True, default_factory=lambda: DropboxAccessConfig() - ) - connector_type: str = CONNECTOR_TYPE - - -@dataclass -class DropboxIndexer(FsspecIndexer): - connection_config: DropboxConnectionConfig - index_config: DropboxIndexerConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def __post_init__(self): - # dropbox expects the path to start with a / - if not self.index_config.path_without_protocol.startswith("/"): - self.index_config.path_without_protocol = "/" + self.index_config.path_without_protocol - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - return super().run(**kwargs) - - def sterilize_info(self, path) -> dict: - # the fs.info method defined in the dropboxdrivefs library expects a "url" - # kwarg rather than "path"; though both refer to the same thing - info = self.fs.info(url=path) - return sterilize_dict(data=info) - - -@dataclass -class DropboxDownloaderConfig(FsspecDownloaderConfig): - pass - - -@dataclass -class DropboxDownloader(FsspecDownloader): - protocol: str = "dropbox" - connection_config: DropboxConnectionConfig - connector_type: str = CONNECTOR_TYPE - download_config: Optional[DropboxDownloaderConfig] = field( - default_factory=DropboxDownloaderConfig - ) - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return super().run(file_data=file_data, **kwargs) - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return await super().run_async(file_data=file_data, **kwargs) - - -@dataclass -class DropboxUploaderConfig(FsspecUploaderConfig): - pass - - -@dataclass -class DropboxUploader(FsspecUploader): - connector_type: str = CONNECTOR_TYPE - connection_config: DropboxConnectionConfig - upload_config: DropboxUploaderConfig = field(default=None) - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def __post_init__(self): - super().__post_init__() - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - return super().run(contents=contents, **kwargs) - - @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox") - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return await super().run_async(path=path, file_data=file_data, **kwargs) - - -dropbox_source_entry = SourceRegistryEntry( - indexer=DropboxIndexer, - indexer_config=DropboxIndexerConfig, - downloader=DropboxDownloader, - downloader_config=DropboxDownloaderConfig, - connection_config=DropboxConnectionConfig, -) - -dropbox_destination_entry = DestinationRegistryEntry( - uploader=DropboxUploader, - uploader_config=DropboxUploaderConfig, - connection_config=DropboxConnectionConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py b/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py deleted file mode 100644 index 2adfa99b0..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/fsspec.py +++ /dev/null @@ -1,344 +0,0 @@ -from __future__ import annotations - -import contextlib -import fnmatch -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path -from time import time -from typing import TYPE_CHECKING, Any, Generator, Optional, TypeVar - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - DownloadResponse, - FileData, - Indexer, - IndexerConfig, - SourceIdentifiers, - UploadContent, - Uploader, - UploaderConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connectors.fsspec.utils import sterilize_dict - -if TYPE_CHECKING: - from fsspec import AbstractFileSystem - -CONNECTOR_TYPE = "fsspec" - - -class Base(object): - def __post_init__(self): - pass - - -@dataclass -class FileConfig(Base): - remote_url: str - protocol: str = field(init=False) - path_without_protocol: str = field(init=False) - supported_protocols: list[str] = field( - default_factory=lambda: [ - "s3", - "s3a", - "abfs", - "az", - "gs", - "gcs", - "box", - "dropbox", - "sftp", - ] - ) - - def __post_init__(self): - super().__post_init__() - self.protocol, self.path_without_protocol = self.remote_url.split("://") - if self.protocol not in self.supported_protocols: - raise ValueError( - "Protocol {} not supported yet, only {} are supported.".format( - self.protocol, ", ".join(self.supported_protocols) - ), - ) - - -@dataclass -class FsspecIndexerConfig(FileConfig, IndexerConfig): - recursive: bool = False - file_glob: Optional[list[str]] = None - - -@dataclass -class FsspecAccessConfig(AccessConfig): - pass - - -FsspecAccessConfigT = TypeVar("FsspecAccessConfigT", bound=FsspecAccessConfig) - - -@dataclass -class FsspecConnectionConfig(ConnectionConfig): - access_config: FsspecAccessConfigT = enhanced_field(sensitive=True, default=None) - connector_type: str = CONNECTOR_TYPE - - -FsspecIndexerConfigT = TypeVar("FsspecIndexerConfigT", bound=FsspecIndexerConfig) -FsspecConnectionConfigT = TypeVar("FsspecConnectionConfigT", bound=FsspecConnectionConfig) - - -@dataclass -class FsspecIndexer(Indexer): - connection_config: FsspecConnectionConfigT - index_config: FsspecIndexerConfigT - connector_type: str = CONNECTOR_TYPE - - @property - def fs(self) -> "AbstractFileSystem": - from fsspec import get_filesystem_class - - return get_filesystem_class(self.index_config.protocol)( - **self.connection_config.get_access_config(), - ) - - def does_path_match_glob(self, path: str) -> bool: - if self.index_config.file_glob is None: - return True - patterns = self.index_config.file_glob - for pattern in patterns: - if fnmatch.filter([path], pattern): - return True - logger.debug(f"The file {path!r} is discarded as it does not match any given glob.") - return False - - def check_connection(self): - from fsspec import get_filesystem_class - - try: - fs = get_filesystem_class(self.index_config.protocol)( - **self.connection_config.get_access_config(), - ) - fs.ls(path=self.index_config.path_without_protocol, detail=False) - except Exception as e: - logger.error(f"failed to validate connection: {e}", exc_info=True) - raise SourceConnectionError(f"failed to validate connection: {e}") - - def list_files(self) -> list[str]: - if not self.index_config.recursive: - # fs.ls does not walk directories - # directories that are listed in cloud storage can cause problems - # because they are seen as 0 byte files - found = self.fs.ls(self.index_config.path_without_protocol, detail=True) - if isinstance(found, list): - return [ - x.get("name") for x in found if x.get("size") > 0 and x.get("type") == "file" - ] - else: - raise TypeError(f"unhandled response type from ls: {type(found)}") - else: - # fs.find will recursively walk directories - # "size" is a common key for all the cloud protocols with fs - found = self.fs.find( - self.index_config.path_without_protocol, - detail=True, - ) - if isinstance(found, dict): - return [ - k for k, v in found.items() if v.get("size") > 0 and v.get("type") == "file" - ] - else: - raise TypeError(f"unhandled response type from find: {type(found)}") - - def get_metadata(self, path: str) -> DataSourceMetadata: - date_created = None - date_modified = None - - try: - created: Optional[Any] = self.fs.created(path) - if created: - if isinstance(created, datetime): - date_created = str(created.timestamp()) - else: - date_created = str(created) - except NotImplementedError: - pass - - try: - modified: Optional[Any] = self.fs.modified(path) - if modified: - if isinstance(modified, datetime): - date_modified = str(modified.timestamp()) - else: - date_modified = str(modified) - except NotImplementedError: - pass - - version = self.fs.checksum(path) - metadata: dict[str, str] = {} - with contextlib.suppress(AttributeError): - metadata = self.fs.metadata(path) - record_locator = { - "protocol": self.index_config.protocol, - "remote_file_path": self.index_config.remote_url, - } - file_stat = self.fs.stat(path=path) - if file_id := file_stat.get("id"): - record_locator["file_id"] = file_id - if metadata: - record_locator["metadata"] = metadata - return DataSourceMetadata( - date_created=date_created, - date_modified=date_modified, - date_processed=str(time()), - version=str(version), - url=f"{self.index_config.protocol}://{path}", - record_locator=record_locator, - ) - - def sterilize_info(self, path) -> dict: - info = self.fs.info(path=path) - return sterilize_dict(data=info) - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - raw_files = self.list_files() - files = [f for f in raw_files if self.does_path_match_glob(f)] - for file in files: - # Note: we remove any remaining leading slashes (Box introduces these) - # to get a valid relative path - rel_path = file.replace(self.index_config.path_without_protocol, "").lstrip("/") - yield FileData( - identifier=file, - connector_type=self.connector_type, - source_identifiers=SourceIdentifiers( - filename=Path(file).name, - rel_path=rel_path or None, - fullpath=file, - ), - metadata=self.get_metadata(path=file), - additional_metadata=self.sterilize_info(path=file), - ) - - -@dataclass -class FsspecDownloaderConfig(DownloaderConfig): - pass - - -FsspecDownloaderConfigT = TypeVar("FsspecDownloaderConfigT", bound=FsspecDownloaderConfig) - - -@dataclass -class FsspecDownloader(Downloader): - protocol: str - connection_config: FsspecConnectionConfigT - connector_type: str = CONNECTOR_TYPE - download_config: Optional[FsspecDownloaderConfigT] = field( - default_factory=lambda: FsspecDownloaderConfig() - ) - - def is_async(self) -> bool: - return self.fs.async_impl - - @property - def fs(self) -> "AbstractFileSystem": - from fsspec import get_filesystem_class - - return get_filesystem_class(self.protocol)( - **self.connection_config.get_access_config(), - ) - - def get_download_path(self, file_data: FileData) -> Path: - return ( - self.download_dir / Path(file_data.source_identifiers.relative_path) - if self.download_config - else Path(file_data.source_identifiers.rel_path) - ) - - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - try: - self.fs.get(rpath=file_data.identifier, lpath=download_path.as_posix()) - except Exception as e: - logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True) - raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}") - return self.generate_download_response(file_data=file_data, download_path=download_path) - - async def async_run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - try: - await self.fs.get(rpath=file_data.identifier, lpath=download_path.as_posix()) - except Exception as e: - logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True) - raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}") - return self.generate_download_response(file_data=file_data, download_path=download_path) - - -@dataclass -class FsspecUploaderConfig(FileConfig, UploaderConfig): - overwrite: bool = False - - -FsspecUploaderConfigT = TypeVar("FsspecUploaderConfigT", bound=FsspecUploaderConfig) - - -@dataclass -class FsspecUploader(Uploader): - connector_type: str = CONNECTOR_TYPE - upload_config: FsspecUploaderConfigT = field(default=None) - - @property - def fs(self) -> "AbstractFileSystem": - from fsspec import get_filesystem_class - - fs_kwargs = self.connection_config.get_access_config() if self.connection_config else {} - return get_filesystem_class(self.upload_config.protocol)( - **fs_kwargs, - ) - - def __post_init__(self): - # TODO once python3.9 no longer supported and kw_only is allowed in dataclasses, remove: - if not self.upload_config: - raise TypeError( - f"{self.__class__.__name__}.__init__() " - f"missing 1 required positional argument: 'upload_config'" - ) - - def get_upload_path(self, file_data: FileData) -> Path: - upload_path = ( - Path(self.upload_config.path_without_protocol) - / file_data.source_identifiers.relative_path - ) - updated_upload_path = upload_path.parent / f"{upload_path.name}.json" - return updated_upload_path - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - for content in contents: - self._run(path=content.path, file_data=content.file_data) - - def _run(self, path: Path, file_data: FileData) -> None: - path_str = str(path.resolve()) - upload_path = self.get_upload_path(file_data=file_data) - if self.fs.exists(path=str(upload_path)) and not self.upload_config.overwrite: - logger.debug(f"Skipping upload of {path} to {upload_path}, file already exists") - return - logger.debug(f"Writing local file {path_str} to {upload_path}") - self.fs.upload(lpath=path_str, rpath=str(upload_path)) - - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - upload_path = self.get_upload_path(file_data=file_data) - path_str = str(path.resolve()) - # Odd that fsspec doesn't run exists() as async even when client support async - already_exists = self.fs.exists(path=str(upload_path)) - if already_exists and not self.upload_config.overwrite: - logger.debug(f"Skipping upload of {path} to {upload_path}, file already exists") - return - logger.debug(f"Writing local file {path_str} to {upload_path}") - self.fs.upload(lpath=path_str, rpath=str(upload_path)) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py b/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py deleted file mode 100644 index 2c51f1c12..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/gcs.py +++ /dev/null @@ -1,141 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Generator, Optional, Union - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.utils.string_and_date_utils import json_to_dict -from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( - FsspecAccessConfig, - FsspecConnectionConfig, - FsspecDownloader, - FsspecDownloaderConfig, - FsspecIndexer, - FsspecIndexerConfig, - FsspecUploader, - FsspecUploaderConfig, -) -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "gcs" - - -@dataclass -class GcsIndexerConfig(FsspecIndexerConfig): - pass - - -@dataclass -class GcsAccessConfig(FsspecAccessConfig): - service_account_key: Optional[str] = None - token: Union[str, dict, None] = field(init=False, default=None) - - def __post_init__(self): - ALLOWED_AUTH_VALUES = "google_default", "cache", "anon", "browser", "cloud" - - # Case: null value - if not self.service_account_key: - return - - # Case: one of auth constants - if self.service_account_key in ALLOWED_AUTH_VALUES: - self.token = self.service_account_key - return - - # Case: token as json - if isinstance(json_to_dict(self.service_account_key), dict): - self.token = json_to_dict(self.service_account_key) - return - - # Case: path to token - if Path(self.service_account_key).is_file(): - self.token = self.service_account_key - return - - raise ValueError("Invalid auth token value") - - -@dataclass -class GcsConnectionConfig(FsspecConnectionConfig): - supported_protocols: list[str] = field(default_factory=lambda: ["gs", "gcs"]) - access_config: GcsAccessConfig = enhanced_field( - sensitive=True, default_factory=lambda: GcsAccessConfig() - ) - connector_type: str = CONNECTOR_TYPE - - -@dataclass -class GcsIndexer(FsspecIndexer): - connection_config: GcsConnectionConfig - index_config: GcsIndexerConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - return super().run(**kwargs) - - -@dataclass -class GcsDownloaderConfig(FsspecDownloaderConfig): - pass - - -@dataclass -class GcsDownloader(FsspecDownloader): - protocol: str = "gcs" - connection_config: GcsConnectionConfig - connector_type: str = CONNECTOR_TYPE - download_config: Optional[GcsDownloaderConfig] = field(default_factory=GcsDownloaderConfig) - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return super().run(file_data=file_data, **kwargs) - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return await super().run_async(file_data=file_data, **kwargs) - - -@dataclass -class GcsUploaderConfig(FsspecUploaderConfig): - pass - - -@dataclass -class GcsUploader(FsspecUploader): - connector_type: str = CONNECTOR_TYPE - connection_config: GcsConnectionConfig - upload_config: GcsUploaderConfig = field(default=None) - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - def __post_init__(self): - super().__post_init__() - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - return super().run(contents=contents, **kwargs) - - @requires_dependencies(["gcsfs", "fsspec"], extras="gcs") - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return await super().run_async(path=path, file_data=file_data, **kwargs) - - -gcs_source_entry = SourceRegistryEntry( - indexer=GcsIndexer, - indexer_config=GcsIndexerConfig, - downloader=GcsDownloader, - downloader_config=GcsDownloaderConfig, - connection_config=GcsConnectionConfig, -) - -gcs_destination_entry = DestinationRegistryEntry( - uploader=GcsUploader, - uploader_config=GcsUploaderConfig, - connection_config=GcsConnectionConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py b/unstructured/ingest/v2/processes/connectors/fsspec/s3.py deleted file mode 100644 index 7f48bdc81..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/s3.py +++ /dev/null @@ -1,163 +0,0 @@ -import contextlib -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path -from time import time -from typing import Any, Generator, Optional - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( - FsspecAccessConfig, - FsspecConnectionConfig, - FsspecDownloader, - FsspecDownloaderConfig, - FsspecIndexer, - FsspecIndexerConfig, - FsspecUploader, - FsspecUploaderConfig, -) -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "s3" - - -@dataclass -class S3IndexerConfig(FsspecIndexerConfig): - pass - - -@dataclass -class S3AccessConfig(FsspecAccessConfig): - key: Optional[str] = None - secret: Optional[str] = None - token: Optional[str] = None - - -@dataclass -class S3ConnectionConfig(FsspecConnectionConfig): - supported_protocols: list[str] = field(default_factory=lambda: ["s3", "s3a"]) - access_config: S3AccessConfig = enhanced_field( - sensitive=True, default_factory=lambda: S3AccessConfig() - ) - endpoint_url: Optional[str] = None - anonymous: bool = False - connector_type: str = CONNECTOR_TYPE - - def get_access_config(self) -> dict[str, Any]: - access_configs: dict[str, Any] = {"anon": self.anonymous} - if self.endpoint_url: - access_configs["endpoint_url"] = self.endpoint_url - - # Avoid injecting None by filtering out k,v pairs where the value is None - access_configs.update({k: v for k, v in self.access_config.to_dict().items() if v}) - return access_configs - - -@dataclass -class S3Indexer(FsspecIndexer): - connection_config: S3ConnectionConfig - index_config: S3IndexerConfig - connector_type: str = CONNECTOR_TYPE - - def get_metadata(self, path: str) -> DataSourceMetadata: - date_created = None - date_modified = None - try: - modified: Optional[datetime] = self.fs.modified(path) - if modified: - date_created = str(modified.timestamp()) - date_modified = str(modified.timestamp()) - except NotImplementedError: - pass - - version = None - info: dict[str, Any] = self.fs.info(path) - if etag := info.get("ETag"): - version = str(etag).rstrip('"').lstrip('"') - metadata: dict[str, str] = {} - with contextlib.suppress(AttributeError): - metadata = self.fs.metadata(path) - record_locator = { - "protocol": self.index_config.protocol, - "remote_file_path": self.index_config.remote_url, - } - if metadata: - record_locator["metadata"] = metadata - return DataSourceMetadata( - date_created=date_created, - date_modified=date_modified, - date_processed=str(time()), - version=version, - url=f"{self.index_config.protocol}://{path}", - record_locator=record_locator, - ) - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - return super().run(**kwargs) - - -@dataclass -class S3DownloaderConfig(FsspecDownloaderConfig): - pass - - -@dataclass -class S3Downloader(FsspecDownloader): - protocol: str = "s3" - connection_config: S3ConnectionConfig - connector_type: str = CONNECTOR_TYPE - download_config: Optional[S3DownloaderConfig] = field(default_factory=S3DownloaderConfig) - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return super().run(file_data=file_data, **kwargs) - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return await super().run_async(file_data=file_data, **kwargs) - - -@dataclass -class S3UploaderConfig(FsspecUploaderConfig): - pass - - -@dataclass -class S3Uploader(FsspecUploader): - connector_type: str = CONNECTOR_TYPE - connection_config: S3ConnectionConfig - upload_config: S3UploaderConfig = field(default=None) - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - def __post_init__(self): - super().__post_init__() - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - return super().run(contents=contents, **kwargs) - - @requires_dependencies(["s3fs", "fsspec"], extras="s3") - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return await super().run_async(path=path, file_data=file_data, **kwargs) - - -s3_source_entry = SourceRegistryEntry( - indexer=S3Indexer, - indexer_config=S3IndexerConfig, - downloader=S3Downloader, - downloader_config=S3DownloaderConfig, - connection_config=S3ConnectionConfig, -) - -s3_destination_entry = DestinationRegistryEntry( - uploader=S3Uploader, - uploader_config=S3UploaderConfig, - connection_config=S3ConnectionConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py b/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py deleted file mode 100644 index d73a22195..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/sftp.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import annotations - -import os -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Generator, Optional -from urllib.parse import urlparse - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import DownloadResponse, FileData, UploadContent -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.fsspec.fsspec import ( - FsspecAccessConfig, - FsspecConnectionConfig, - FsspecDownloader, - FsspecDownloaderConfig, - FsspecIndexer, - FsspecIndexerConfig, - FsspecUploader, - FsspecUploaderConfig, -) -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "sftp" - - -@dataclass -class SftpIndexerConfig(FsspecIndexerConfig): - def __post_init__(self): - super().__post_init__() - _, ext = os.path.splitext(self.remote_url) - parsed_url = urlparse(self.remote_url) - if ext: - self.path_without_protocol = Path(parsed_url.path).parent.as_posix().lstrip("/") - else: - self.path_without_protocol = parsed_url.path.lstrip("/") - - -@dataclass -class SftpAccessConfig(FsspecAccessConfig): - password: str - - -@dataclass -class SftpConnectionConfig(FsspecConnectionConfig): - supported_protocols: list[str] = field(default_factory=lambda: ["sftp"]) - access_config: SftpAccessConfig = enhanced_field(sensitive=True) - connector_type: str = CONNECTOR_TYPE - username: Optional[str] = None - host: Optional[str] = None - port: int = 22 - look_for_keys: bool = False - allow_agent: bool = False - - def get_access_config(self) -> dict[str, Any]: - access_config = { - "username": self.username, - "host": self.host, - "port": self.port, - "look_for_keys": self.look_for_keys, - "allow_agent": self.allow_agent, - "password": self.access_config.password, - } - return access_config - - -@dataclass -class SftpIndexer(FsspecIndexer): - connection_config: SftpConnectionConfig - index_config: SftpIndexerConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def __post_init__(self): - parsed_url = urlparse(self.index_config.remote_url) - self.connection_config.host = parsed_url.hostname or self.connection_config.host - self.connection_config.port = parsed_url.port or self.connection_config.port - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - for file in super().run(**kwargs): - new_identifier = ( - f"sftp://" - f"{self.connection_config.host}:" - f"{self.connection_config.port}/" - f"{file.identifier}" - ) - file.identifier = new_identifier - yield file - - -@dataclass -class SftpDownloaderConfig(FsspecDownloaderConfig): - remote_url: Optional[str] = None - - def __post_init__(self): - # TODO once python3.9 no longer supported and kw_only is allowed in dataclasses, remove: - if not self.remote_url: - raise TypeError( - f"{self.__class__.__name__}.__init__() " - f"missing 1 required positional argument: 'remote_url'" - ) - - -@dataclass -class SftpDownloader(FsspecDownloader): - protocol: str = "sftp" - connection_config: SftpConnectionConfig - connector_type: str = CONNECTOR_TYPE - download_config: Optional[SftpDownloaderConfig] = field(default_factory=SftpDownloaderConfig) - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def __post_init__(self): - parsed_url = urlparse(self.download_config.remote_url) - self.connection_config.host = parsed_url.hostname or self.connection_config.host - self.connection_config.port = parsed_url.port or self.connection_config.port - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return super().run(file_data=file_data, **kwargs) - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return await super().run_async(file_data=file_data, **kwargs) - - -@dataclass -class SftpUploaderConfig(FsspecUploaderConfig): - pass - - -@dataclass -class SftpUploader(FsspecUploader): - connector_type: str = CONNECTOR_TYPE - connection_config: SftpConnectionConfig - upload_config: SftpUploaderConfig = field(default=None) - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def __post_init__(self): - super().__post_init__() - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - return super().run(contents=contents, **kwargs) - - @requires_dependencies(["paramiko", "fsspec"], extras="sftp") - async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None: - return await super().run_async(path=path, file_data=file_data, **kwargs) - - -sftp_source_entry = SourceRegistryEntry( - indexer=SftpIndexer, - indexer_config=SftpIndexerConfig, - downloader=SftpDownloader, - downloader_config=SftpDownloaderConfig, - connection_config=SftpConnectionConfig, -) - -sftp_destination_entry = DestinationRegistryEntry( - uploader=SftpUploader, - uploader_config=SftpUploaderConfig, - connection_config=SftpConnectionConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/fsspec/utils.py b/unstructured/ingest/v2/processes/connectors/fsspec/utils.py deleted file mode 100644 index e852e21dd..000000000 --- a/unstructured/ingest/v2/processes/connectors/fsspec/utils.py +++ /dev/null @@ -1,17 +0,0 @@ -import json -from datetime import datetime -from pathlib import Path -from typing import Callable - - -def json_serial(obj): - if isinstance(obj, Path): - return obj.as_posix() - if isinstance(obj, datetime): - return obj.isoformat() - raise TypeError("Type %s not serializable" % type(obj)) - - -def sterilize_dict(data: dict, default: Callable = json_serial) -> dict: - data_s = json.dumps(data, default=default) - return json.loads(data_s) diff --git a/unstructured/ingest/v2/processes/connectors/google_drive.py b/unstructured/ingest/v2/processes/connectors/google_drive.py deleted file mode 100644 index 8d61671cf..000000000 --- a/unstructured/ingest/v2/processes/connectors/google_drive.py +++ /dev/null @@ -1,335 +0,0 @@ -import io -import os -from dataclasses import dataclass, field -from pathlib import Path -from typing import TYPE_CHECKING, Any, Generator, Optional, Union - -from dateutil import parser - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.file_utils.google_filetype import GOOGLE_DRIVE_EXPORT_TYPES -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionNetworkError -from unstructured.ingest.utils.string_and_date_utils import json_to_dict -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - FileData, - Indexer, - IndexerConfig, - SourceIdentifiers, - download_responses, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - SourceRegistryEntry, -) -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "google_drive" - -if TYPE_CHECKING: - from googleapiclient.discovery import Resource as GoogleAPIResource - from googleapiclient.http import MediaIoBaseDownload - - -@dataclass -class GoogleDriveAccessConfig(AccessConfig): - service_account_key: Union[str, dict] - - -@dataclass -class GoogleDriveConnectionConfig(ConnectionConfig): - drive_id: str - access_config: GoogleDriveAccessConfig = enhanced_field(sensitive=True) - - @requires_dependencies(["googleapiclient"], extras="google-drive") - def get_files_service(self) -> "GoogleAPIResource": - from google.auth import default, exceptions - from google.oauth2 import service_account - from googleapiclient.discovery import build - from googleapiclient.errors import HttpError - - # Service account key can be a dict or a file path(str) - # But the dict may come in as a string - if isinstance(self.access_config.service_account_key, str): - key_path = json_to_dict(self.access_config.service_account_key) - elif isinstance(self.access_config.service_account_key, dict): - key_path = self.access_config.service_account_key - else: - raise TypeError( - f"access_config.service_account_key must be " - f"str or dict, got: {type(self.access_config.service_account_key)}" - ) - - try: - if isinstance(key_path, dict): - creds = service_account.Credentials.from_service_account_info(key_path) - elif isinstance(key_path, str): - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path - creds, _ = default() - else: - raise ValueError( - f"key path not recognized as a dictionary or a file path: " - f"[{type(key_path)}] {key_path}", - ) - service = build("drive", "v3", credentials=creds) - return service.files() - - except HttpError as exc: - raise ValueError(f"{exc.reason}") - except exceptions.DefaultCredentialsError: - raise ValueError("The provided API key is invalid.") - - -@dataclass -class GoogleDriveIndexerConfig(IndexerConfig): - extensions: Optional[list[str]] = None - recursive: bool = False - - def __post_init__(self): - # Strip leading period of extension - if self.extensions is not None: - self.extensions = [e[1:] if e.startswith(".") else e for e in self.extensions] - - -@dataclass -class GoogleDriveIndexer(Indexer): - connection_config: GoogleDriveConnectionConfig - index_config: GoogleDriveIndexerConfig - fields: list[str] = field( - default_factory=lambda: [ - "id", - "name", - "mimeType", - "fileExtension", - "md5Checksum", - "sha1Checksum", - "sha256Checksum", - "headRevisionId", - "permissions", - "createdTime", - "modifiedTime", - "version", - "originalFilename", - "capabilities", - "permissionIds", - "webViewLink", - "webContentLink", - ] - ) - - @staticmethod - def is_dir(record: dict) -> bool: - return record.get("mimeType") == "application/vnd.google-apps.folder" - - @staticmethod - def map_file_data(f: dict) -> FileData: - file_id = f["id"] - filename = f.pop("name") - url = f.pop("webContentLink", None) - version = f.pop("version", None) - permissions = f.pop("permissions", None) - date_created_str = f.pop("createdTime", None) - date_created_dt = parser.parse(date_created_str) if date_created_str else None - date_modified_str = f.pop("modifiedTime", None) - parent_path = f.pop("parent_path", None) - parent_root_path = f.pop("parent_root_path", None) - date_modified_dt = parser.parse(date_modified_str) if date_modified_str else None - if ( - parent_path - and isinstance(parent_path, str) - and parent_root_path - and isinstance(parent_root_path, str) - ): - fullpath = f"{parent_path}/{filename}" - rel_path = fullpath.replace(parent_root_path, "") - source_identifiers = SourceIdentifiers( - filename=filename, fullpath=fullpath, rel_path=rel_path - ) - else: - source_identifiers = SourceIdentifiers(fullpath=filename, filename=filename) - return FileData( - connector_type=CONNECTOR_TYPE, - identifier=file_id, - source_identifiers=source_identifiers, - metadata=DataSourceMetadata( - url=url, - version=version, - date_created=str(date_created_dt.timestamp()), - date_modified=str(date_modified_dt.timestamp()), - permissions_data=permissions, - record_locator={ - "file_id": file_id, - }, - ), - additional_metadata=f, - ) - - def get_paginated_results( - self, - files_client, - object_id: str, - extensions: Optional[list[str]] = None, - recursive: bool = False, - previous_path: Optional[str] = None, - ) -> list[dict]: - - fields_input = "nextPageToken, files({})".format(",".join(self.fields)) - q = f"'{object_id}' in parents" - # Filter by extension but still include any directories - if extensions: - ext_filter = " or ".join([f"fileExtension = '{e}'" for e in extensions]) - q = f"{q} and ({ext_filter} or mimeType = 'application/vnd.google-apps.folder')" - logger.debug(f"Query used when indexing: {q}") - logger.debug("response fields limited to: {}".format(", ".join(self.fields))) - done = False - page_token = None - files_response = [] - while not done: - response: dict = files_client.list( - spaces="drive", - fields=fields_input, - corpora="user", - pageToken=page_token, - q=q, - ).execute() - if files := response.get("files", []): - fs = [f for f in files if not self.is_dir(record=f)] - for r in fs: - r["parent_path"] = previous_path - dirs = [f for f in files if self.is_dir(record=f)] - files_response.extend(fs) - if recursive: - for d in dirs: - dir_id = d["id"] - dir_name = d["name"] - files_response.extend( - self.get_paginated_results( - files_client=files_client, - object_id=dir_id, - extensions=extensions, - recursive=recursive, - previous_path=f"{previous_path}/{dir_name}", - ) - ) - page_token = response.get("nextPageToken") - if page_token is None: - done = True - for r in files_response: - r["parent_root_path"] = previous_path - return files_response - - def get_root_info(self, files_client, object_id: str) -> dict: - return files_client.get(fileId=object_id, fields=",".join(self.fields)).execute() - - def get_files( - self, - files_client, - object_id: str, - recursive: bool = False, - extensions: Optional[list[str]] = None, - ) -> list[FileData]: - root_info = self.get_root_info(files_client=files_client, object_id=object_id) - if not self.is_dir(root_info): - data = [self.map_file_data(root_info)] - else: - - file_contents = self.get_paginated_results( - files_client=files_client, - object_id=object_id, - extensions=extensions, - recursive=recursive, - previous_path=root_info["name"], - ) - data = [self.map_file_data(f=f) for f in file_contents] - for d in data: - d.metadata.record_locator["drive_id"]: object_id - return data - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - for f in self.get_files( - files_client=self.connection_config.get_files_service(), - object_id=self.connection_config.drive_id, - recursive=self.index_config.recursive, - extensions=self.index_config.extensions, - ): - yield f - - -@dataclass -class GoogleDriveDownloaderConfig(DownloaderConfig): - pass - - -@dataclass -class GoogleDriveDownloader(Downloader): - connection_config: GoogleDriveConnectionConfig - download_config: GoogleDriveDownloaderConfig = field( - default_factory=lambda: GoogleDriveDownloaderConfig() - ) - connector_type: str = CONNECTOR_TYPE - - def get_download_path(self, file_data: FileData) -> Path: - rel_path = file_data.source_identifiers.relative_path - rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path - return self.download_dir / Path(rel_path) - - @SourceConnectionNetworkError.wrap - def _get_content(self, downloader: "MediaIoBaseDownload") -> bool: - downloaded = False - while downloaded is False: - _, downloaded = downloader.next_chunk() - return downloaded - - def _write_file(self, file_data: FileData, file_contents: io.BytesIO): - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - logger.debug(f"writing {file_data.source_identifiers.fullpath} to {download_path}") - with open(download_path, "wb") as handler: - handler.write(file_contents.getbuffer()) - return self.generate_download_response(file_data=file_data, download_path=download_path) - - @requires_dependencies(["googleapiclient"], extras="google-drive") - def run(self, file_data: FileData, **kwargs: Any) -> download_responses: - from googleapiclient.http import MediaIoBaseDownload - - logger.debug(f"fetching file: {file_data.source_identifiers.fullpath}") - mime_type = file_data.additional_metadata["mimeType"] - record_id = file_data.identifier - files_client = self.connection_config.get_files_service() - if mime_type.startswith("application/vnd.google-apps"): - export_mime = GOOGLE_DRIVE_EXPORT_TYPES.get( - self.meta.get("mimeType"), # type: ignore - ) - if not export_mime: - raise TypeError( - f"File not supported. Name: {file_data.source_identifiers.filename} " - f"ID: {record_id} " - f"MimeType: {mime_type}" - ) - - request = files_client.export_media( - fileId=record_id, - mimeType=export_mime, - ) - else: - request = files_client.get_media(fileId=record_id) - - file_contents = io.BytesIO() - downloader = MediaIoBaseDownload(file_contents, request) - downloaded = self._get_content(downloader=downloader) - if not downloaded or not file_contents: - return [] - return self._write_file(file_data=file_data, file_contents=file_contents) - - -google_drive_source_entry = SourceRegistryEntry( - connection_config=GoogleDriveConnectionConfig, - indexer_config=GoogleDriveIndexerConfig, - indexer=GoogleDriveIndexer, - downloader_config=GoogleDriveDownloaderConfig, - downloader=GoogleDriveDownloader, -) diff --git a/unstructured/ingest/v2/processes/connectors/local.py b/unstructured/ingest/v2/processes/connectors/local.py deleted file mode 100644 index 811606d79..000000000 --- a/unstructured/ingest/v2/processes/connectors/local.py +++ /dev/null @@ -1,203 +0,0 @@ -import glob -import itertools -import shutil -from dataclasses import dataclass, field -from pathlib import Path -from time import time -from typing import Any, Generator, Optional - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - DownloadResponse, - FileData, - Indexer, - IndexerConfig, - SourceIdentifiers, - UploadContent, - Uploader, - UploaderConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) - -CONNECTOR_TYPE = "local" - - -@dataclass -class LocalAccessConfig(AccessConfig): - pass - - -@dataclass -class LocalConnectionConfig(ConnectionConfig): - access_config: LocalAccessConfig = field(default_factory=lambda: LocalAccessConfig()) - - -@dataclass -class LocalIndexerConfig(IndexerConfig): - input_path: str - recursive: bool = False - file_glob: Optional[list[str]] = None - - @property - def path(self) -> Path: - return Path(self.input_path).resolve() - - -@dataclass -class LocalIndexer(Indexer): - index_config: LocalIndexerConfig - connection_config: LocalConnectionConfig = field( - default_factory=lambda: LocalConnectionConfig() - ) - connector_type: str = CONNECTOR_TYPE - - def list_files(self) -> list[Path]: - input_path = self.index_config.path - if input_path.is_file(): - return [Path(s) for s in glob.glob(f"{self.index_config.path}")] - glob_fn = input_path.rglob if self.index_config.recursive else input_path.glob - if not self.index_config.file_glob: - return list(glob_fn("*")) - return list( - itertools.chain.from_iterable( - glob_fn(pattern) for pattern in self.index_config.file_glob - ) - ) - - def get_file_metadata(self, path: Path) -> DataSourceMetadata: - stats = path.stat() - try: - date_modified = str(stats.st_mtime) - except Exception as e: - logger.warning(f"Couldn't detect date modified: {e}") - date_modified = None - - try: - date_created = str(stats.st_birthtime) - except Exception as e: - logger.warning(f"Couldn't detect date created: {e}") - date_created = None - - try: - mode = stats.st_mode - permissions_data = [{"mode": mode}] - except Exception as e: - logger.warning(f"Couldn't detect file mode: {e}") - permissions_data = None - return DataSourceMetadata( - date_modified=date_modified, - date_created=date_created, - date_processed=str(time()), - permissions_data=permissions_data, - record_locator={"path": str(path.resolve())}, - ) - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - for file_path in self.list_files(): - file_data = FileData( - identifier=str(file_path.resolve()), - connector_type=CONNECTOR_TYPE, - source_identifiers=SourceIdentifiers( - fullpath=str(file_path.resolve()), - filename=file_path.name, - rel_path=( - str(file_path.resolve()).replace(str(self.index_config.path.resolve()), "")[ - 1: - ] - if not self.index_config.path.is_file() - else self.index_config.path.name - ), - ), - metadata=self.get_file_metadata(path=file_path), - ) - yield file_data - - -@dataclass -class LocalDownloaderConfig(DownloaderConfig): - pass - - -@dataclass -class LocalDownloader(Downloader): - connector_type: str = CONNECTOR_TYPE - connection_config: LocalConnectionConfig = field( - default_factory=lambda: LocalConnectionConfig() - ) - download_config: LocalDownloaderConfig = field(default_factory=lambda: LocalDownloaderConfig()) - - def get_download_path(self, file_data: FileData) -> Path: - return Path(file_data.source_identifiers.fullpath) - - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - return DownloadResponse( - file_data=file_data, path=Path(file_data.source_identifiers.fullpath) - ) - - -@dataclass -class LocalUploaderConfig(UploaderConfig): - output_dir: str = field(default="structured-output") - - @property - def output_path(self) -> Path: - return Path(self.output_dir).resolve() - - def __post_init__(self): - if self.output_path.exists() and self.output_path.is_file(): - raise ValueError("output path already exists as a file") - - -@dataclass -class LocalUploader(Uploader): - connector_type: str = CONNECTOR_TYPE - upload_config: LocalUploaderConfig = field(default_factory=lambda: LocalUploaderConfig()) - connection_config: LocalConnectionConfig = field( - default_factory=lambda: LocalConnectionConfig() - ) - - def is_async(self) -> bool: - return False - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - self.upload_config.output_path.mkdir(parents=True, exist_ok=True) - for content in contents: - if source_identifiers := content.file_data.source_identifiers: - identifiers = source_identifiers - rel_path = ( - identifiers.relative_path[1:] - if identifiers.relative_path.startswith("/") - else identifiers.relative_path - ) - new_path = self.upload_config.output_path / Path(rel_path) - final_path = str(new_path).replace( - identifiers.filename, f"{identifiers.filename}.json" - ) - else: - final_path = self.upload_config.output_path / Path( - f"{content.file_data.identifier}.json" - ) - Path(final_path).parent.mkdir(parents=True, exist_ok=True) - logger.debug(f"copying file from {content.path} to {final_path}") - shutil.copy(src=str(content.path), dst=str(final_path)) - - -local_source_entry = SourceRegistryEntry( - indexer=LocalIndexer, - indexer_config=LocalIndexerConfig, - downloader=LocalDownloader, - downloader_config=LocalDownloaderConfig, - connection_config=LocalConnectionConfig, -) - -local_destination_entry = DestinationRegistryEntry( - uploader=LocalUploader, uploader_config=LocalUploaderConfig -) diff --git a/unstructured/ingest/v2/processes/connectors/mongodb.py b/unstructured/ingest/v2/processes/connectors/mongodb.py deleted file mode 100644 index f5003911c..000000000 --- a/unstructured/ingest/v2/processes/connectors/mongodb.py +++ /dev/null @@ -1,137 +0,0 @@ -import json -from dataclasses import dataclass, field -from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional - -from unstructured.__version__ import __version__ as unstructured_version -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - FileData, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, -) -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from pymongo import MongoClient - -CONNECTOR_TYPE = "mongodb" -SERVER_API_VERSION = "1" - - -@dataclass -class MongoDBAccessConfig(AccessConfig): - uri: Optional[str] = None - - -@dataclass -class MongoDBConnectionConfig(ConnectionConfig): - access_config: MongoDBAccessConfig = enhanced_field( - sensitive=True, default_factory=MongoDBAccessConfig - ) - host: Optional[str] = None - database: Optional[str] = None - collection: Optional[str] = None - port: int = 27017 - batch_size: int = 100 - connector_type: str = CONNECTOR_TYPE - - -@dataclass -class MongoDBUploadStagerConfig(UploadStagerConfig): - pass - - -@dataclass -class MongoDBUploadStager(UploadStager): - upload_stager_config: MongoDBUploadStagerConfig = field( - default_factory=lambda: MongoDBUploadStagerConfig() - ) - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - - output_path = Path(output_dir) / Path(f"{output_filename}.json") - with open(output_path, "w") as output_file: - json.dump(elements_contents, output_file) - return output_path - - -@dataclass -class MongoDBUploaderConfig(UploaderConfig): - batch_size: int = 100 - - -@dataclass -class MongoDBUploader(Uploader): - upload_config: MongoDBUploaderConfig - connection_config: MongoDBConnectionConfig - client: Optional["MongoClient"] = field(init=False) - connector_type: str = CONNECTOR_TYPE - - def __post_init__(self): - self.client = self.create_client() - - @requires_dependencies(["pymongo"], extras="mongodb") - def create_client(self) -> "MongoClient": - from pymongo import MongoClient - from pymongo.driver_info import DriverInfo - from pymongo.server_api import ServerApi - - if self.connection_config.access_config.uri: - return MongoClient( - self.connection_config.access_config.uri, - server_api=ServerApi(version=SERVER_API_VERSION), - driver=DriverInfo(name="unstructured", version=unstructured_version), - ) - else: - return MongoClient( - host=self.connection_config.host, - port=self.connection_config.port, - server_api=ServerApi(version=SERVER_API_VERSION), - ) - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - - logger.info( - f"writing {len(elements_dict)} objects to destination " - f"db, {self.connection_config.database}, " - f"collection {self.connection_config.collection} " - f"at {self.connection_config.host}", - ) - db = self.client[self.connection_config.database] - collection = db[self.connection_config.collection] - for chunk in batch_generator(elements_dict, self.upload_config.batch_size): - collection.insert_many(chunk) - - -mongodb_destination_entry = DestinationRegistryEntry( - connection_config=MongoDBConnectionConfig, - uploader=MongoDBUploader, - uploader_config=MongoDBUploaderConfig, - upload_stager=MongoDBUploadStager, - upload_stager_config=MongoDBUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/onedrive.py b/unstructured/ingest/v2/processes/connectors/onedrive.py deleted file mode 100644 index 4769cf626..000000000 --- a/unstructured/ingest/v2/processes/connectors/onedrive.py +++ /dev/null @@ -1,218 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import dataclass, field -from pathlib import Path -from time import time -from typing import TYPE_CHECKING, Any, Generator, Optional - -from dateutil import parser - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionError, SourceConnectionNetworkError -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - DownloadResponse, - FileData, - Indexer, - IndexerConfig, - SourceIdentifiers, - download_responses, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - SourceRegistryEntry, -) -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from office365.graph_client import GraphClient - from office365.onedrive.driveitems.driveItem import DriveItem - -CONNECTOR_TYPE = "onedrive" -MAX_MB_SIZE = 512_000_000 - - -@dataclass -class OnedriveAccessConfig(AccessConfig): - client_cred: str - - -@dataclass -class OnedriveConnectionConfig(ConnectionConfig): - client_id: str - user_pname: str - tenant: str = field(repr=False) - authority_url: Optional[str] = field(repr=False, default="https://login.microsoftonline.com") - access_config: OnedriveAccessConfig = enhanced_field(sensitive=True) - - @requires_dependencies(["msal"], extras="onedrive") - def get_token(self): - from msal import ConfidentialClientApplication - - try: - app = ConfidentialClientApplication( - authority=f"{self.authority_url}/{self.tenant}", - client_id=self.client_id, - client_credential=self.access_config.client_cred, - ) - token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"]) - except ValueError as exc: - logger.error("Couldn't set up credentials for OneDrive") - raise exc - if "error" in token: - raise SourceConnectionNetworkError( - "failed to fetch token, {}: {}".format(token["error"], token["error_description"]) - ) - return token - - @requires_dependencies(["office365"], extras="onedrive") - def get_client(self) -> "GraphClient": - from office365.graph_client import GraphClient - - client = GraphClient(self.get_token) - return client - - -@dataclass -class OnedriveIndexerConfig(IndexerConfig): - path: Optional[str] = field(default="") - recursive: bool = False - - -@dataclass -class OnedriveIndexer(Indexer): - connection_config: OnedriveConnectionConfig - index_config: OnedriveIndexerConfig - - def list_objects(self, folder: DriveItem, recursive: bool) -> list[DriveItem]: - drive_items: list[DriveItem] = list(folder.children.get().execute_query()) - files = [d for d in drive_items if d.is_file] - if not recursive: - return files - folders = [d for d in drive_items if d.is_folder] - for f in folders: - files.extend(self.list_objects(f, recursive)) - return files - - def get_root(self, client: "GraphClient") -> "DriveItem": - root = client.users[self.connection_config.user_pname].drive.get().execute_query().root - if fpath := self.index_config.path: - root = root.get_by_path(fpath).get().execute_query() - if root is None or not root.is_folder: - raise ValueError(f"Unable to find directory, given: {fpath}") - return root - - def get_properties(self, drive_item: "DriveItem") -> dict: - properties = drive_item.properties - filtered_properties = {} - for k, v in properties.items(): - try: - json.dumps(v) - filtered_properties[k] = v - except TypeError: - pass - return filtered_properties - - def drive_item_to_file_data(self, drive_item: "DriveItem") -> FileData: - file_path = drive_item.parent_reference.path.split(":")[-1] - file_path = file_path[1:] if file_path and file_path[0] == "/" else file_path - filename = drive_item.name - server_path = file_path + "/" + filename - rel_path = server_path.replace(self.index_config.path, "").lstrip("/") - date_modified_dt = ( - parser.parse(str(drive_item.last_modified_datetime)) - if drive_item.last_modified_datetime - else None - ) - date_created_at = ( - parser.parse(str(drive_item.created_datetime)) if drive_item.created_datetime else None - ) - return FileData( - identifier=drive_item.id, - connector_type=CONNECTOR_TYPE, - source_identifiers=SourceIdentifiers( - fullpath=server_path, filename=drive_item.name, rel_path=rel_path - ), - metadata=DataSourceMetadata( - url=drive_item.parent_reference.path + "/" + drive_item.name, - version=drive_item.etag, - date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None, - date_created=str(date_created_at.timestamp()) if date_created_at else None, - date_processed=str(time()), - record_locator={ - "user_pname": self.connection_config.user_pname, - "server_relative_path": server_path, - }, - ), - additional_metadata=self.get_properties(drive_item=drive_item), - ) - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - client = self.connection_config.get_client() - root = self.get_root(client=client) - drive_items = self.list_objects(folder=root, recursive=self.index_config.recursive) - for drive_item in drive_items: - file_data = self.drive_item_to_file_data(drive_item=drive_item) - yield file_data - - -@dataclass -class OnedriveDownloaderConfig(DownloaderConfig): - pass - - -@dataclass -class OnedriveDownloader(Downloader): - connection_config: OnedriveConnectionConfig - download_config: OnedriveDownloaderConfig - - @SourceConnectionNetworkError.wrap - def _fetch_file(self, file_data: FileData): - if file_data.source_identifiers is None or not file_data.source_identifiers.fullpath: - raise ValueError( - f"file data doesn't have enough information to get " - f"file content: {file_data.to_dict()}" - ) - - server_relative_path = file_data.source_identifiers.fullpath - client = self.connection_config.get_client() - root = client.users[self.connection_config.user_pname].drive.get().execute_query().root - file = root.get_by_path(server_relative_path).get().execute_query() - if not file: - raise FileNotFoundError(f"file not found: {server_relative_path}") - return file - - def get_download_path(self, file_data: FileData) -> Optional[Path]: - rel_path = file_data.source_identifiers.relative_path - rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path - return self.download_dir / Path(rel_path) - - @SourceConnectionError.wrap - def run(self, file_data: FileData, **kwargs: Any) -> download_responses: - file = self._fetch_file(file_data=file_data) - fsize = file.get_property("size", 0) - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - logger.info(f"Downloading {file_data.source_identifiers.fullpath} to {download_path}") - if fsize > MAX_MB_SIZE: - logger.info(f"Downloading file with size: {fsize} bytes in chunks") - with download_path.open(mode="wb") as f: - file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query() - else: - with download_path.open(mode="wb") as f: - file.download(f).execute_query() - return DownloadResponse(file_data=file_data, path=download_path) - - -onedrive_source_entry = SourceRegistryEntry( - connection_config=OnedriveConnectionConfig, - indexer_config=OnedriveIndexerConfig, - indexer=OnedriveIndexer, - downloader_config=OnedriveDownloaderConfig, - downloader=OnedriveDownloader, -) diff --git a/unstructured/ingest/v2/processes/connectors/opensearch.py b/unstructured/ingest/v2/processes/connectors/opensearch.py deleted file mode 100644 index 0933cd1fa..000000000 --- a/unstructured/ingest/v2/processes/connectors/opensearch.py +++ /dev/null @@ -1,155 +0,0 @@ -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Optional - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.error import ( - DestinationConnectionError, -) -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, - SourceRegistryEntry, -) -from unstructured.ingest.v2.processes.connectors.elasticsearch import ( - ElasticsearchDownloader, - ElasticsearchDownloaderConfig, - ElasticsearchIndexer, - ElasticsearchIndexerConfig, - ElasticsearchUploader, - ElasticsearchUploaderConfig, - ElasticsearchUploadStager, - ElasticsearchUploadStagerConfig, -) -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from opensearchpy import OpenSearch - -CONNECTOR_TYPE = "opensearch" - -"""Since the actual OpenSearch project is a fork of Elasticsearch, we are relying -heavily on the Elasticsearch connector code, inheriting the functionality as much as possible.""" - - -@dataclass -class OpenSearchAccessConfig(AccessConfig): - password: Optional[str] = enhanced_field(default=None, sensitive=True) - use_ssl: bool = False - verify_certs: bool = False - ssl_show_warn: bool = False - ca_certs: Optional[str] = None - client_cert: Optional[str] = None - client_key: Optional[str] = None - - -@dataclass -class OpenSearchClientInput(EnhancedDataClassJsonMixin): - http_auth: Optional[tuple[str, str]] = enhanced_field(sensitive=True, default=None) - hosts: Optional[list[str]] = None - use_ssl: bool = False - verify_certs: bool = False - ssl_show_warn: bool = False - ca_certs: Optional[str] = None - client_cert: Optional[str] = None - client_key: Optional[str] = None - - -@dataclass -class OpenSearchConnectionConfig(ConnectionConfig): - hosts: Optional[list[str]] = None - username: Optional[str] = None - access_config: OpenSearchAccessConfig = enhanced_field(sensitive=True) - - def get_client_kwargs(self) -> dict: - # Update auth related fields to conform to what the SDK expects based on the - # supported methods: - # https://github.com/opensearch-project/opensearch-py/blob/main/opensearchpy/client/__init__.py - client_input = OpenSearchClientInput() - if self.hosts: - client_input.hosts = self.hosts - if self.access_config.use_ssl: - client_input.use_ssl = self.access_config.use_ssl - if self.access_config.verify_certs: - client_input.verify_certs = self.access_config.verify_certs - if self.access_config.ssl_show_warn: - client_input.ssl_show_warn = self.access_config.ssl_show_warn - if self.access_config.ca_certs: - client_input.ca_certs = self.access_config.ca_certs - if self.access_config.client_cert: - client_input.client_cert = self.access_config.client_cert - if self.access_config.client_key: - client_input.client_key = self.access_config.client_key - if self.username and self.access_config.password: - client_input.http_auth = (self.username, self.access_config.password) - logger.debug( - f"OpenSearch client inputs mapped to: {client_input.to_dict(redact_sensitive=True)}" - ) - client_kwargs = client_input.to_dict(redact_sensitive=False) - client_kwargs = {k: v for k, v in client_kwargs.items() if v is not None} - return client_kwargs - - @DestinationConnectionError.wrap - @requires_dependencies(["opensearchpy"], extras="opensearch") - def get_client(self) -> "OpenSearch": - from opensearchpy import OpenSearch - - return OpenSearch(**self.get_client_kwargs()) - - -@dataclass -class OpenSearchIndexer(ElasticsearchIndexer): - connection_config: OpenSearchConnectionConfig - client: "OpenSearch" = field(init=False) - - @requires_dependencies(["opensearchpy"], extras="opensearch") - def load_scan(self): - from opensearchpy.helpers import scan - - return scan - - -@dataclass -class OpenSearchDownloader(ElasticsearchDownloader): - connection_config: OpenSearchConnectionConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["opensearchpy"], extras="opensearch") - def load_async(self): - from opensearchpy import AsyncOpenSearch - from opensearchpy.helpers import async_scan - - return AsyncOpenSearch, async_scan - - -@dataclass -class OpenSearchUploader(ElasticsearchUploader): - connection_config: OpenSearchConnectionConfig - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["opensearchpy"], extras="opensearch") - def load_parallel_bulk(self): - from opensearchpy.helpers import parallel_bulk - - return parallel_bulk - - -opensearch_source_entry = SourceRegistryEntry( - connection_config=OpenSearchConnectionConfig, - indexer=OpenSearchIndexer, - indexer_config=ElasticsearchIndexerConfig, - downloader=OpenSearchDownloader, - downloader_config=ElasticsearchDownloaderConfig, -) - - -opensearch_destination_entry = DestinationRegistryEntry( - connection_config=OpenSearchConnectionConfig, - upload_stager_config=ElasticsearchUploadStagerConfig, - upload_stager=ElasticsearchUploadStager, - uploader_config=ElasticsearchUploaderConfig, - uploader=OpenSearchUploader, -) diff --git a/unstructured/ingest/v2/processes/connectors/pinecone.py b/unstructured/ingest/v2/processes/connectors/pinecone.py deleted file mode 100644 index 0cd087b9c..000000000 --- a/unstructured/ingest/v2/processes/connectors/pinecone.py +++ /dev/null @@ -1,178 +0,0 @@ -import json -import multiprocessing as mp -import uuid -from dataclasses import dataclass, field -from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import DestinationConnectionError -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, -) -from unstructured.staging.base import flatten_dict -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from pinecone import Index as PineconeIndex - - -CONNECTOR_TYPE = "pinecone" - - -@dataclass -class PineconeAccessConfig(AccessConfig): - api_key: Optional[str] = enhanced_field(default=None, overload_name="pinecone_api_key") - - -@dataclass -class PineconeConnectionConfig(ConnectionConfig): - index_name: str - environment: str - access_config: PineconeAccessConfig = enhanced_field(sensitive=True) - - @requires_dependencies(["pinecone"], extras="pinecone") - def get_index(self) -> "PineconeIndex": - from pinecone import Pinecone - - from unstructured import __version__ as unstructured_version - - pc = Pinecone( - api_key=self.access_config.api_key, - source_tag=f"unstructured=={unstructured_version}", - ) - - index = pc.Index(self.index_name) - logger.debug(f"Connected to index: {pc.describe_index(self.index_name)}") - return index - - -@dataclass -class PineconeUploadStagerConfig(UploadStagerConfig): - pass - - -@dataclass -class PineconeUploaderConfig(UploaderConfig): - batch_size: int = 100 - num_of_processes: int = 4 - - -@dataclass -class PineconeUploadStager(UploadStager): - upload_stager_config: PineconeUploadStagerConfig = field( - default_factory=lambda: PineconeUploadStagerConfig() - ) - - @staticmethod - def conform_dict(element_dict: dict) -> dict: - # While flatten_dict enables indexing on various fields, - # element_serialized enables easily reloading the element object to memory. - # element_serialized is formed without text/embeddings to avoid data bloating. - return { - "id": str(uuid.uuid4()), - "values": element_dict.pop("embeddings", None), - "metadata": { - "text": element_dict.pop("text", None), - "element_serialized": json.dumps(element_dict), - **flatten_dict( - element_dict, - separator="-", - flatten_lists=True, - remove_none=True, - ), - }, - } - - def run( - self, - elements_filepath: Path, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - - conformed_elements = [ - self.conform_dict(element_dict=element) for element in elements_contents - ] - - output_path = Path(output_dir) / Path(f"{output_filename}.json") - output_path.parent.mkdir(parents=True, exist_ok=True) - - with open(output_path, "w") as output_file: - json.dump(conformed_elements, output_file) - return output_path - - -@dataclass -class PineconeUploader(Uploader): - upload_config: PineconeUploaderConfig - connection_config: PineconeConnectionConfig - connector_type: str = CONNECTOR_TYPE - - @DestinationConnectionError.wrap - def check_connection(self): - _ = self.connection_config.get_index() - - @requires_dependencies(["pinecone"], extras="pinecone") - def upsert_batch(self, batch): - from pinecone.exceptions import PineconeApiException - - try: - index = self.connection_config.get_index() - response = index.upsert(batch) - except PineconeApiException as api_error: - raise DestinationConnectionError(f"http error: {api_error}") from api_error - logger.debug(f"results: {response}") - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - - logger.info( - f"writing document batches to destination" - f" index named {self.connection_config.index_name}" - f" environment named {self.connection_config.environment}" - f" with batch size {self.upload_config.batch_size}" - f" with {self.upload_config.num_of_processes} (number of) processes" - ) - - pinecone_batch_size = self.upload_config.batch_size - - if self.upload_config.num_of_processes == 1: - for batch in batch_generator(elements_dict, pinecone_batch_size): - self.upsert_batch(batch) # noqa: E203 - - else: - with mp.Pool( - processes=self.upload_config.num_of_processes, - ) as pool: - pool.map( - self.upsert_batch, list(batch_generator(elements_dict, pinecone_batch_size)) - ) - - -pinecone_destination_entry = DestinationRegistryEntry( - connection_config=PineconeConnectionConfig, - uploader=PineconeUploader, - uploader_config=PineconeUploaderConfig, - upload_stager=PineconeUploadStager, - upload_stager_config=PineconeUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/salesforce.py b/unstructured/ingest/v2/processes/connectors/salesforce.py deleted file mode 100644 index e1f018895..000000000 --- a/unstructured/ingest/v2/processes/connectors/salesforce.py +++ /dev/null @@ -1,293 +0,0 @@ -""" -Salesforce Connector -Able to download Account, Case, Campaign, EmailMessage, Lead -Salesforce returns everything as a list of json. -This saves each entry as a separate file to be partitioned. -Using JWT authorization -https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_key_and_cert.htm -https://developer.salesforce.com/docs/atlas.en-us.sfdx_dev.meta/sfdx_dev/sfdx_dev_auth_connected_app.htm -""" - -import json -from collections import OrderedDict -from dataclasses import dataclass, field -from email.utils import formatdate -from pathlib import Path -from string import Template -from textwrap import dedent -from typing import TYPE_CHECKING, Any, Generator, Type - -from dateutil import parser - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.error import SourceConnectionNetworkError -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - DownloadResponse, - FileData, - Indexer, - IndexerConfig, - SourceIdentifiers, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - SourceRegistryEntry, -) -from unstructured.utils import requires_dependencies - - -class MissingCategoryError(Exception): - """There are no categories with that name.""" - - -CONNECTOR_TYPE = "salesforce" - -if TYPE_CHECKING: - from simple_salesforce import Salesforce - -SALESFORCE_API_VERSION = "57.0" - -# TODO: Add more categories as needed -ACCEPTED_CATEGORIES: list[str] = ["Account", "Case", "Campaign", "EmailMessage", "Lead"] - -# Generic minimal email template used only -# to process EmailMessage records as .eml files -EMAIL_TEMPLATE = Template( - """MIME-Version: 1.0 -Date: $date -Message-ID: $message_identifier -Subject: $subject -From: $from_email -To: $to_email -Content-Type: multipart/alternative; boundary="00000000000095c9b205eff92630" ---00000000000095c9b205eff92630 -Content-Type: text/plain; charset="UTF-8" -$textbody ---00000000000095c9b205eff92630 -Content-Type: text/html; charset="UTF-8" -$htmlbody ---00000000000095c9b205eff92630-- -""", -) - - -@dataclass -class SalesforceAccessConfig(AccessConfig): - consumer_key: str - private_key: str - - @requires_dependencies(["cryptography"]) - def get_private_key_value_and_type(self) -> tuple[str, Type]: - from cryptography.hazmat.primitives import serialization - - try: - serialization.load_pem_private_key(data=self.private_key.encode("utf-8"), password=None) - except ValueError: - pass - else: - return self.private_key, str - - if Path(self.private_key).is_file(): - return self.private_key, Path - - raise ValueError("private_key does not contain PEM private key or path") - - -@dataclass -class SalesforceConnectionConfig(ConnectionConfig): - username: str - access_config: SalesforceAccessConfig = enhanced_field(sensitive=True) - - @requires_dependencies(["simple_salesforce"], extras="salesforce") - def get_client(self) -> "Salesforce": - from simple_salesforce import Salesforce - - pkey_value, pkey_type = self.access_config.get_private_key_value_and_type() - - return Salesforce( - username=self.username, - consumer_key=self.access_config.consumer_key, - privatekey_file=pkey_value if pkey_type is Path else None, - privatekey=pkey_value if pkey_type is str else None, - version=SALESFORCE_API_VERSION, - ) - - -@dataclass -class SalesforceIndexerConfig(IndexerConfig): - categories: list[str] - - -@dataclass -class SalesforceIndexer(Indexer): - connection_config: SalesforceConnectionConfig - index_config: SalesforceIndexerConfig - - def __post_init__(self): - for record_type in self.index_config.categories: - if record_type not in ACCEPTED_CATEGORIES: - raise ValueError(f"{record_type} not currently an accepted Salesforce category") - - def get_file_extension(self, record_type) -> str: - if record_type == "EmailMessage": - extension = ".eml" - elif record_type in ["Account", "Lead", "Case", "Campaign"]: - extension = ".xml" - else: - raise MissingCategoryError( - f"There are no categories with the name: {record_type}", - ) - return extension - - @requires_dependencies(["simple_salesforce"], extras="salesforce") - def list_files(self) -> list[FileData]: - """Get Salesforce Ids for the records. - Send them to next phase where each doc gets downloaded into the - appropriate format for partitioning. - """ - from simple_salesforce.exceptions import SalesforceMalformedRequest - - client = self.connection_config.get_client() - - files_list = [] - for record_type in self.index_config.categories: - try: - # Get ids from Salesforce - records = client.query_all_iter( - f"select Id, SystemModstamp, CreatedDate, LastModifiedDate from {record_type}", - ) - for record in records: - record_with_extension = record["Id"] + self.get_file_extension( - record["attributes"]["type"] - ) - files_list.append( - FileData( - connector_type=CONNECTOR_TYPE, - identifier=record["Id"], - source_identifiers=SourceIdentifiers( - filename=record_with_extension, - fullpath=f"{record['attributes']['type']}/{record_with_extension}", - ), - metadata=DataSourceMetadata( - url=record["attributes"]["url"], - version=str(parser.parse(record["SystemModstamp"]).timestamp()), - date_created=str(parser.parse(record["CreatedDate"]).timestamp()), - date_modified=str( - parser.parse(record["LastModifiedDate"]).timestamp() - ), - record_locator={"id": record["Id"]}, - ), - additional_metadata={"record_type": record["attributes"]["type"]}, - ) - ) - except SalesforceMalformedRequest as e: - raise SalesforceMalformedRequest(f"Problem with Salesforce query: {e}") - - return files_list - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - for f in self.list_files(): - yield f - - -@dataclass -class SalesforceDownloaderConfig(DownloaderConfig): - pass - - -@dataclass -class SalesforceDownloader(Downloader): - connection_config: SalesforceConnectionConfig - download_config: SalesforceDownloaderConfig = field( - default_factory=lambda: SalesforceDownloaderConfig() - ) - connector_type: str = CONNECTOR_TYPE - - def get_download_path(self, file_data: FileData) -> Path: - rel_path = file_data.source_identifiers.relative_path - rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path - return self.download_dir / Path(rel_path) - - def _xml_for_record(self, record: OrderedDict) -> str: - """Creates partitionable xml file from a record""" - import xml.etree.ElementTree as ET - - def create_xml_doc(data, parent, prefix=""): - for key, value in data.items(): - if isinstance(value, OrderedDict): - create_xml_doc(value, parent, prefix=f"{prefix}{key}.") - else: - item = ET.Element("item") - item.text = f"{prefix}{key}: {value}" - parent.append(item) - - root = ET.Element("root") - create_xml_doc(record, root) - - xml_string = ET.tostring(root, encoding="utf-8", xml_declaration=True).decode() - return xml_string - - def _eml_for_record(self, email_json: dict[str, Any]) -> str: - """Recreates standard expected .eml format using template.""" - eml = EMAIL_TEMPLATE.substitute( - date=formatdate(parser.parse(email_json.get("MessageDate")).timestamp()), - message_identifier=email_json.get("MessageIdentifier"), - subject=email_json.get("Subject"), - from_email=email_json.get("FromAddress"), - to_email=email_json.get("ToAddress"), - textbody=email_json.get("TextBody"), - htmlbody=email_json.get("HtmlBody"), - ) - return dedent(eml) - - @SourceConnectionNetworkError.wrap - def _get_response(self, file_data: FileData) -> OrderedDict: - client = self.connection_config.get_client() - return client.query( - f"select FIELDS(STANDARD) from {file_data.additional_metadata['record_type']} where Id='{file_data.identifier}'", # noqa: E501 - ) - - def get_record(self, file_data: FileData) -> OrderedDict: - # Get record from Salesforce based on id - response = self._get_response(file_data) - logger.debug(f"response was returned for salesforce record id: {file_data.identifier}") - records = response["records"] - if not records: - raise ValueError( - f"No record found with record id {file_data.identifier}: {json.dumps(response)}" - ) - record_json = records[0] - return record_json - - def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse: - record = self.get_record(file_data) - - try: - if file_data.additional_metadata["record_type"] == "EmailMessage": - document = self._eml_for_record(record) - else: - document = self._xml_for_record(record) - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - - with open(download_path, "w") as page_file: - page_file.write(document) - - except Exception as e: - logger.error(f"failed to download file {file_data.identifier}: {e}", exc_info=True) - raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}") - - return self.generate_download_response(file_data=file_data, download_path=download_path) - - -salesforce_source_entry = SourceRegistryEntry( - connection_config=SalesforceConnectionConfig, - indexer_config=SalesforceIndexerConfig, - indexer=SalesforceIndexer, - downloader_config=SalesforceDownloaderConfig, - downloader=SalesforceDownloader, -) diff --git a/unstructured/ingest/v2/processes/connectors/sharepoint.py b/unstructured/ingest/v2/processes/connectors/sharepoint.py deleted file mode 100644 index 696d327ce..000000000 --- a/unstructured/ingest/v2/processes/connectors/sharepoint.py +++ /dev/null @@ -1,411 +0,0 @@ -import json -from dataclasses import dataclass, field -from enum import Enum -from pathlib import Path -from time import time -from typing import TYPE_CHECKING, Any, Generator, Optional -from urllib.parse import quote - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.error import SourceConnectionNetworkError -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - Downloader, - DownloaderConfig, - DownloadResponse, - FileData, - Indexer, - IndexerConfig, - SourceIdentifiers, - download_responses, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - SourceRegistryEntry, -) -from unstructured.utils import requires_dependencies - -from .utils import parse_datetime - -if TYPE_CHECKING: - from office365.graph_client import GraphClient - from office365.onedrive.driveitems.driveItem import DriveItem - from office365.onedrive.drives.drive import Drive - from office365.onedrive.permissions.permission import Permission - from office365.onedrive.sites.site import Site - from office365.sharepoint.client_context import ClientContext - from office365.sharepoint.files.file import File - from office365.sharepoint.folders.folder import Folder - from office365.sharepoint.publishing.pages.page import SitePage - -CONNECTOR_TYPE = "sharepoint" - -MAX_MB_SIZE = 512_000_000 - -# TODO handle other data types possible from Sharepoint -# exampled: https://github.com/vgrem/Office365-REST-Python-Client/tree/master/examples/sharepoint - - -class SharepointContentType(Enum): - DOCUMENT = "document" - SITEPAGE = "site_page" - LIST = "list" - - -@dataclass -class SharepointAccessConfig(AccessConfig): - client_cred: str - - -@dataclass -class SharepointPermissionsConfig(EnhancedDataClassJsonMixin): - permissions_application_id: str - permissions_tenant: str - permissions_client_cred: str = enhanced_field(sensitive=True) - authority_url: Optional[str] = field(repr=False, default="https://login.microsoftonline.com") - - -@dataclass -class SharepointConnectionConfig(ConnectionConfig): - client_id: str - site: str - access_config: SharepointAccessConfig = enhanced_field(sensitive=True) - permissions_config: Optional[SharepointPermissionsConfig] = None - - @requires_dependencies(["office365"], extras="sharepoint") - def get_client(self) -> "ClientContext": - from office365.runtime.auth.client_credential import ClientCredential - from office365.sharepoint.client_context import ClientContext - - try: - credentials = ClientCredential(self.client_id, self.access_config.client_cred) - site_client = ClientContext(self.site).with_credentials(credentials) - except Exception as e: - logger.error(f"Couldn't set Sharepoint client: {e}") - raise e - return site_client - - @requires_dependencies(["msal"], extras="sharepoint") - def get_permissions_token(self): - from msal import ConfidentialClientApplication - - try: - app = ConfidentialClientApplication( - authority=f"{self.permissions_config.authority_url}/" - f"{self.permissions_config.permissions_tenant}", - client_id=self.permissions_config.permissions_application_id, - client_credential=self.permissions_config.permissions_client_cred, - ) - token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"]) - except ValueError as exc: - logger.error("Couldn't set up credentials for Sharepoint") - raise exc - if "error" in token: - raise SourceConnectionNetworkError( - "failed to fetch token, {}: {}".format(token["error"], token["error_description"]) - ) - return token - - @requires_dependencies(["office365"], extras="sharepoint") - def get_permissions_client(self) -> Optional["GraphClient"]: - from office365.graph_client import GraphClient - - if self.permissions_config is None: - return None - - client = GraphClient(self.get_permissions_token) - return client - - -@dataclass -class SharepointIndexerConfig(IndexerConfig): - path: Optional[str] = None - recursive: bool = False - omit_files: bool = False - omit_pages: bool = False - omit_lists: bool = False - - -@dataclass -class SharepointIndexer(Indexer): - connection_config: SharepointConnectionConfig - index_config: SharepointIndexerConfig = field(default_factory=lambda: SharepointIndexerConfig()) - - def list_files(self, folder: "Folder", recursive: bool = False) -> list["File"]: - if not recursive: - folder.expand(["Files"]).get().execute_query() - return folder.files - - folder.expand(["Files", "Folders"]).get().execute_query() - files: list["File"] = list(folder.files) - folders: list["Folder"] = list(folder.folders) - for f in folders: - if "/Forms" in f.serverRelativeUrl: - continue - files.extend(self.list_files(f, recursive)) - return files - - def get_properties(self, raw_properties: dict) -> dict: - raw_properties = {k: v for k, v in raw_properties.items() if v} - filtered_properties = {} - for k, v in raw_properties.items(): - try: - json.dumps(v) - filtered_properties[k] = v - except TypeError: - pass - return filtered_properties - - def list_pages(self, client: "ClientContext") -> list["SitePage"]: - pages = client.site_pages.pages.get().execute_query() - return pages - - def page_to_file_data(self, site_page: "SitePage") -> FileData: - site_page.expand(site_page.properties.keys()).get().execute_query() - version = site_page.properties.get("Version", None) - unique_id = site_page.properties.get("UniqueId", None) - modified_date = site_page.properties.get("Modified", None) - url = site_page.properties.get("AbsoluteUrl", None) - date_modified_dt = parse_datetime(modified_date) if modified_date else None - date_created_at = ( - parse_datetime(site_page.first_published) - if (site_page.first_published and site_page.first_published != "0001-01-01T08:00:00Z") - else None - ) - file_path = site_page.get_property("Url", "") - server_path = file_path if file_path[0] != "/" else file_path[1:] - additional_metadata = self.get_properties(raw_properties=site_page.properties) - additional_metadata["sharepoint_content_type"] = SharepointContentType.SITEPAGE.value - return FileData( - identifier=unique_id, - connector_type=CONNECTOR_TYPE, - source_identifiers=SourceIdentifiers( - filename=site_page.file_name, - fullpath=file_path, - rel_path=file_path.replace(self.index_config.path, ""), - ), - metadata=DataSourceMetadata( - url=url, - version=version, - date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None, - date_created=str(date_created_at.timestamp()) if date_created_at else None, - date_processed=str(time()), - record_locator={ - "server_path": server_path, - }, - ), - additional_metadata=additional_metadata, - ) - - def file_to_file_data(self, client: "ClientContext", file: "File") -> FileData: - file.expand(file.properties.keys()).get().execute_query() - absolute_url = f"{client.base_url}{quote(file.serverRelativeUrl)}" - date_modified_dt = ( - parse_datetime(file.time_last_modified) if file.time_last_modified else None - ) - - date_created_at = parse_datetime(file.time_created) if file.time_created else None - additional_metadata = self.get_properties(raw_properties=file.properties) - additional_metadata["sharepoint_content_type"] = SharepointContentType.DOCUMENT.value - fullpath = str(file.serverRelativeUrl) - rel_path = fullpath.replace(self.index_config.path, "") - while rel_path[0] == "/": - rel_path = rel_path[1:] - return FileData( - identifier=file.unique_id, - connector_type=CONNECTOR_TYPE, - source_identifiers=SourceIdentifiers( - filename=file.name, - fullpath=fullpath, - rel_path=rel_path, - ), - metadata=DataSourceMetadata( - url=absolute_url, - version=f"{file.major_version}.{file.minor_version}", - date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None, - date_created=str(date_created_at.timestamp()) if date_created_at else None, - date_processed=str(time()), - record_locator={"server_path": file.serverRelativeUrl, "site_url": client.base_url}, - ), - additional_metadata=additional_metadata, - ) - - def get_root(self, client: "ClientContext") -> "Folder": - if path := self.index_config.path: - return client.web.get_folder_by_server_relative_path(path) - default_document_library = client.web.default_document_library() - root_folder = default_document_library.root_folder - root_folder = root_folder.get().execute_query() - self.index_config.path = root_folder.name - return root_folder - - def get_site_url(self, client: "ClientContext") -> str: - res = client.web.get().execute_query() - return res.url - - def get_site(self, permissions_client: "GraphClient", site_url) -> "Site": - return permissions_client.sites.get_by_url(url=site_url).execute_query() - - def get_permissions_items(self, site: "Site") -> list["DriveItem"]: - # TODO find a way to narrow this search down by name of drive - items: list["DriveItem"] = [] - drives: list["Drive"] = site.drives.get_all().execute_query() - for drive in drives: - items.extend(drive.root.children.get_all().execute_query()) - return items - - def map_permission(self, permission: "Permission") -> dict: - return { - "id": permission.id, - "roles": list(permission.roles), - "share_id": permission.share_id, - "has_password": permission.has_password, - "link": permission.link.to_json(), - "granted_to_identities": permission.granted_to_identities.to_json(), - "granted_to": permission.granted_to.to_json(), - "granted_to_v2": permission.granted_to_v2.to_json(), - "granted_to_identities_v2": permission.granted_to_identities_v2.to_json(), - "invitation": permission.invitation.to_json(), - } - - def enrich_permissions_on_files(self, all_file_data: list[FileData], site_url: str) -> None: - logger.debug("Enriching permissions on files") - permission_client = self.connection_config.get_permissions_client() - if permission_client is None: - return - site = self.get_site(permissions_client=permission_client, site_url=site_url) - existing_items = self.get_permissions_items(site=site) - for file_data in all_file_data: - etag = file_data.additional_metadata.get("ETag") - if not etag: - continue - matching_items = list(filter(lambda x: x.etag == etag, existing_items)) - if not matching_items: - continue - if len(matching_items) > 1: - logger.warning( - "Found multiple drive items with etag matching {}, skipping: {}".format( - etag, ", ".join([i.name for i in matching_items]) - ) - ) - continue - matching_item = matching_items[0] - permissions: list["Permission"] = matching_item.permissions.get_all().execute_query() - permissions_data = [ - self.map_permission(permission=permission) for permission in permissions - ] - file_data.metadata.permissions_data = permissions_data - - @property - def process_permissions(self) -> bool: - return ( - self.connection_config.permissions_config.permissions_tenant - and self.connection_config.permissions_config.permissions_client_cred - and self.connection_config.permissions_config.permissions_application_id - ) - - def run(self, **kwargs: Any) -> Generator[FileData, None, None]: - client = self.connection_config.get_client() - root_folder = self.get_root(client=client) - logger.debug(f"processing content from path: {self.index_config.path}") - if not self.index_config.omit_files: - files = self.list_files(root_folder, recursive=self.index_config.recursive) - file_data = [self.file_to_file_data(file=file, client=client) for file in files] - if self.process_permissions: - self.enrich_permissions_on_files( - all_file_data=file_data, site_url=self.get_site_url(client=client) - ) - for file in file_data: - yield file - if not self.index_config.omit_pages: - pages = self.list_pages(client=client) - for page in pages: - file_data = self.page_to_file_data(site_page=page) - file_data.metadata.record_locator["site_url"] = client.base_url - yield file_data - - -@dataclass -class SharepointDownloaderConfig(DownloaderConfig): - pass - - -@dataclass -class SharepointDownloader(Downloader): - connection_config: SharepointConnectionConfig - download_config: SharepointDownloaderConfig - connector_type: str = CONNECTOR_TYPE - - def get_download_path(self, file_data: FileData) -> Path: - content_type = file_data.additional_metadata.get("sharepoint_content_type") - rel_path = file_data.source_identifiers.fullpath - rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path - download_path = self.download_dir / Path(rel_path) - if content_type == SharepointContentType.SITEPAGE.value: - # Update output extension to html if site page - download_path = download_path.with_suffix(".html") - return download_path - - def get_document(self, file_data: FileData) -> DownloadResponse: - client: "ClientContext" = self.connection_config.get_client() - file: "File" = client.web.get_file_by_id(unique_id=file_data.identifier) - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - logger.debug( - f"writing document content {file_data.source_identifiers.fullpath} to {download_path}" - ) - with download_path.open("wb") as f: - file.download(f).execute_query() - return self.generate_download_response(file_data=file_data, download_path=download_path) - - def get_site_page(self, file_data: FileData) -> DownloadResponse: - # TODO fetch comments for site page as well - from lxml import etree, html - - canvas_content_raw = file_data.additional_metadata.get("CanvasContent1") - layout_web_parts_content_raw = file_data.additional_metadata.get("LayoutWebpartsContent") - html_content = [] - if layout_web_parts_content_raw: - layout_web_parts_content = json.loads(layout_web_parts_content_raw) - for web_part in layout_web_parts_content: - properties = web_part.get("properties", {}) - if title := properties.get("title"): - html_content.append(f"{title}") - if canvas_content_raw: - canvas_content = json.loads(canvas_content_raw) - for content in canvas_content: - if inner_html := content.get("innerHTML"): - html_content.append(inner_html) - htmls = "".join(html_content) - content = f"

" - document = html.fromstring(content) - download_path = self.get_download_path(file_data=file_data) - download_path.parent.mkdir(parents=True, exist_ok=True) - logger.debug( - f"writing site page content {file_data.source_identifiers.filename} to {download_path}" - ) - with download_path.open("w") as f: - f.write(etree.tostring(document, encoding="unicode", pretty_print=True)) - return self.generate_download_response(file_data=file_data, download_path=download_path) - - def run(self, file_data: FileData, **kwargs: Any) -> download_responses: - content_type = file_data.additional_metadata.get("sharepoint_content_type") - if not content_type: - raise ValueError( - f"Missing sharepoint_content_type metadata: {file_data.additional_metadata}" - ) - if content_type == SharepointContentType.DOCUMENT.value: - return self.get_document(file_data=file_data) - elif content_type == SharepointContentType.SITEPAGE.value: - return self.get_site_page(file_data=file_data) - - -sharepoint_source_entry = SourceRegistryEntry( - connection_config=SharepointConnectionConfig, - indexer_config=SharepointIndexerConfig, - indexer=SharepointIndexer, - downloader_config=SharepointDownloaderConfig, - downloader=SharepointDownloader, -) diff --git a/unstructured/ingest/v2/processes/connectors/singlestore.py b/unstructured/ingest/v2/processes/connectors/singlestore.py deleted file mode 100644 index 3e2d534e2..000000000 --- a/unstructured/ingest/v2/processes/connectors/singlestore.py +++ /dev/null @@ -1,160 +0,0 @@ -import json -from dataclasses import dataclass -from datetime import date, datetime -from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional - -import numpy as np -import pandas as pd -from dateutil import parser - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.utils.data_prep import batch_generator -from unstructured.ingest.utils.table import convert_to_pandas_dataframe -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - FileData, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, -) -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from singlestoredb.connection import Connection - -CONNECTOR_TYPE = "singlestore" - - -@dataclass -class SingleStoreAccessConfig(AccessConfig): - password: Optional[str] = None - - -@dataclass -class SingleStoreConnectionConfig(ConnectionConfig): - host: Optional[str] = None - port: Optional[int] = None - user: Optional[str] = None - database: Optional[str] = None - access_config: SingleStoreAccessConfig = enhanced_field(sensitive=True) - - @requires_dependencies(["singlestoredb"], extras="singlestore") - def get_connection(self) -> "Connection": - import singlestoredb as s2 - - conn = s2.connect( - host=self.host, - port=self.port, - database=self.database, - user=self.user, - password=self.access_config.password, - ) - return conn - - -@dataclass -class SingleStoreUploadStagerConfig(UploadStagerConfig): - drop_empty_cols: bool = False - - -@dataclass -class SingleStoreUploadStager(UploadStager): - upload_stager_config: SingleStoreUploadStagerConfig - - @staticmethod - def parse_date_string(date_string: str) -> date: - try: - timestamp = float(date_string) - return datetime.fromtimestamp(timestamp) - except Exception as e: - logger.debug(f"date {date_string} string not a timestamp: {e}") - return parser.parse(date_string) - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - output_path = Path(output_dir) / Path(f"{output_filename}.csv") - output_path.parent.mkdir(parents=True, exist_ok=True) - - df = convert_to_pandas_dataframe( - elements_dict=elements_contents, - drop_empty_cols=self.upload_stager_config.drop_empty_cols, - ) - datetime_columns = [ - "data_source_date_created", - "data_source_date_modified", - "data_source_date_processed", - ] - for column in filter(lambda x: x in df.columns, datetime_columns): - df[column] = df[column].apply(self.parse_date_string) - if "data_source_record_locator" in df.columns: - df["data_source_record_locator"] = df["data_source_record_locator"].apply( - lambda x: json.dumps(x) if x else None - ) - - with output_path.open("w") as output_file: - df.to_csv(output_file, index=False) - return output_path - - -@dataclass -class SingleStoreUploaderConfig(UploaderConfig): - table_name: str - batch_size: int = 100 - - -@dataclass -class SingleStoreUploader(Uploader): - connection_config: SingleStoreConnectionConfig - upload_config: SingleStoreUploaderConfig - connector_type: str = CONNECTOR_TYPE - - def upload_csv(self, content: UploadContent) -> None: - df = pd.read_csv(content.path) - logger.debug( - f"uploading {len(df)} entries to {self.connection_config.database} " - f"db in table {self.upload_config.table_name}" - ) - stmt = "INSERT INTO {} ({}) VALUES ({})".format( - self.upload_config.table_name, - ", ".join(df.columns), - ", ".join(["%s"] * len(df.columns)), - ) - logger.debug(f"sql statement: {stmt}") - df.replace({np.nan: None}, inplace=True) - data_as_tuples = list(df.itertuples(index=False, name=None)) - with self.connection_config.get_connection() as conn: - with conn.cursor() as cur: - for chunk in batch_generator( - data_as_tuples, batch_size=self.upload_config.batch_size - ): - cur.executemany(stmt, chunk) - conn.commit() - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - for content in contents: - self.upload_csv(content=content) - - -singlestore_destination_entry = DestinationRegistryEntry( - connection_config=SingleStoreConnectionConfig, - uploader=SingleStoreUploader, - uploader_config=SingleStoreUploaderConfig, - upload_stager=SingleStoreUploadStager, - upload_stager_config=SingleStoreUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/sql.py b/unstructured/ingest/v2/processes/connectors/sql.py deleted file mode 100644 index cfec183a1..000000000 --- a/unstructured/ingest/v2/processes/connectors/sql.py +++ /dev/null @@ -1,265 +0,0 @@ -import enum -import json -import uuid -from dataclasses import dataclass, field -from datetime import date, datetime -from pathlib import Path -from typing import Any, Optional, Union - -import numpy as np -import pandas as pd -from dateutil import parser - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - FileData, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import DestinationRegistryEntry -from unstructured.utils import requires_dependencies - -CONNECTOR_TYPE = "sql" -ELEMENTS_TABLE_NAME = "elements" - - -@dataclass -class SQLAccessConfig(AccessConfig): - username: Optional[str] = None - password: Optional[str] = None - - -class DatabaseType(str, enum.Enum): - SQLITE = "sqlite" - POSTGRESQL = "postgresql" - - -@dataclass -class SimpleSqlConfig(ConnectionConfig): - db_type: DatabaseType = ( - # required default value here because of parent class - DatabaseType.SQLITE - ) - database: Optional[str] = None - host: Optional[str] = None - port: Optional[int] = 5432 - access_config: Optional[SQLAccessConfig] = enhanced_field(default=None, sensitive=True) - connector_type: str = CONNECTOR_TYPE - - def __post_init__(self): - if (self.db_type == DatabaseType.SQLITE) and (self.database is None): - raise ValueError( - "A sqlite connection requires a path to a *.db file " - "through the `database` argument" - ) - - -@dataclass -class SQLUploadStagerConfig(UploadStagerConfig): - pass - - -_COLUMNS = ( - "id", - "element_id", - "text", - "embeddings", - "type", - "system", - "layout_width", - "layout_height", - "points", - "url", - "version", - "date_created", - "date_modified", - "date_processed", - "permissions_data", - "record_locator", - "category_depth", - "parent_id", - "attached_filename", - "filetype", - "last_modified", - "file_directory", - "filename", - "languages", - "page_number", - "links", - "page_name", - "link_urls", - "link_texts", - "sent_from", - "sent_to", - "subject", - "section", - "header_footer_type", - "emphasized_text_contents", - "emphasized_text_tags", - "text_as_html", - "detection_class_prob", -) - -_DATE_COLUMNS = ("date_created", "date_modified", "date_processed", "last_modified") - - -def parse_date_string(date_value: Union[str, int]) -> date: - try: - timestamp = float(date_value) / 1000 if isinstance(date_value, int) else float(date_value) - return datetime.fromtimestamp(timestamp) - except Exception as e: - logger.debug(f"date {date_value} string not a timestamp: {e}") - return parser.parse(date_value) - - -@dataclass -class SQLUploadStager(UploadStager): - upload_stager_config: SQLUploadStagerConfig = field( - default_factory=lambda: SQLUploadStagerConfig() - ) - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - output_path = Path(output_dir) / Path(f"{output_filename}.json") - output_path.parent.mkdir(parents=True, exist_ok=True) - - output = [] - for data in elements_contents: - metadata: dict[str, Any] = data.pop("metadata", {}) - data_source = metadata.pop("data_source", {}) - coordinates = metadata.pop("coordinates", {}) - - data.update(metadata) - data.update(data_source) - data.update(coordinates) - - data["id"] = str(uuid.uuid4()) - - # remove extraneous, not supported columns - [data.pop(column) for column in data if column not in _COLUMNS] - - output.append(data) - - df = pd.DataFrame.from_dict(output) - for column in filter(lambda x: x in df.columns, _DATE_COLUMNS): - df[column] = df[column].apply(parse_date_string) - for column in filter( - lambda x: x in df.columns, - ("permissions_data", "record_locator", "points", "links"), - ): - df[column] = df[column].apply( - lambda x: json.dumps(x) if isinstance(x, (list, dict)) else None - ) - for column in filter(lambda x: x in df.columns, ("version", "page_number")): - df[column] = df[column].apply(str) - - with output_path.open("w") as output_file: - df.to_json(output_file, orient="records", lines=True) - return output_path - - -@dataclass -class SQLUploaderConfig(UploaderConfig): - batch_size: int = 50 - - -@dataclass -class SQLUploader(Uploader): - connector_type: str = CONNECTOR_TYPE - upload_config: SQLUploaderConfig - connection_config: SimpleSqlConfig - - @property - def connection(self): - if self.connection_config.db_type == DatabaseType.POSTGRESQL: - return self._make_psycopg_connection - elif self.connection_config.db_type == DatabaseType.SQLITE: - return self._make_sqlite_connection - raise ValueError(f"Unsupported database {self.connection_config.db_type} connection.") - - def _make_sqlite_connection(self): - from sqlite3 import connect - - return connect(database=self.connection_config.database) - - @requires_dependencies(["psycopg2"], extras="postgres") - def _make_psycopg_connection(self): - from psycopg2 import connect - - return connect( - user=self.connection_config.access_config.username, - password=self.connection_config.access_config.password, - dbname=self.connection_config.database, - host=self.connection_config.host, - port=self.connection_config.port, - ) - - def prepare_data( - self, columns: list[str], data: tuple[tuple[Any, ...], ...] - ) -> list[tuple[Any, ...]]: - output = [] - for row in data: - parsed = [] - for column_name, value in zip(columns, row): - if self.connection_config.db_type == DatabaseType.SQLITE and isinstance( - value, (list, dict) - ): - value = json.dumps(value) - if column_name in _DATE_COLUMNS: - if value is None: - parsed.append(None) - else: - parsed.append(parse_date_string(value)) - else: - parsed.append(value) - output.append(tuple(parsed)) - return output - - def upload_contents(self, content: UploadContent) -> None: - df = pd.read_json(content.path, orient="records", lines=True) - logger.debug(f"uploading {len(df)} entries to {self.connection_config.database} ") - df.replace({np.nan: None}, inplace=True) - - columns = tuple(df.columns) - stmt = f"INSERT INTO {ELEMENTS_TABLE_NAME} ({','.join(columns)}) \ - VALUES({','.join(['?' if self.connection_config.db_type==DatabaseType.SQLITE else '%s' for x in columns])})" # noqa E501 - - for rows in pd.read_json( - content.path, orient="records", lines=True, chunksize=self.upload_config.batch_size - ): - with self.connection() as conn: - values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None))) - if self.connection_config.db_type == DatabaseType.SQLITE: - conn.executemany(stmt, values) - else: - with conn.cursor() as cur: - cur.executemany(stmt, values) - - conn.commit() - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - for content in contents: - self.upload_contents(content=content) - - -sql_destination_entry = DestinationRegistryEntry( - connection_config=SimpleSqlConfig, - uploader=SQLUploader, - uploader_config=SQLUploaderConfig, - upload_stager=SQLUploadStager, - upload_stager_config=SQLUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/processes/connectors/utils.py b/unstructured/ingest/v2/processes/connectors/utils.py deleted file mode 100644 index 6e6a8e5fc..000000000 --- a/unstructured/ingest/v2/processes/connectors/utils.py +++ /dev/null @@ -1,19 +0,0 @@ -from datetime import datetime -from typing import Union - -from dateutil import parser - - -def parse_datetime(date_value: Union[int, str, float, datetime]) -> datetime: - if isinstance(date_value, datetime): - return date_value - elif isinstance(date_value, float): - return datetime.fromtimestamp(date_value) - elif isinstance(date_value, int): - return datetime.fromtimestamp(date_value / 1000) - - try: - timestamp = float(date_value) - return datetime.fromtimestamp(timestamp) - except ValueError: - return parser.parse(date_value) diff --git a/unstructured/ingest/v2/processes/connectors/weaviate.py b/unstructured/ingest/v2/processes/connectors/weaviate.py deleted file mode 100644 index 67a6c024c..000000000 --- a/unstructured/ingest/v2/processes/connectors/weaviate.py +++ /dev/null @@ -1,232 +0,0 @@ -import json -from dataclasses import dataclass, field -from datetime import date, datetime -from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional - -from dateutil import parser - -from unstructured.ingest.enhanced_dataclass import enhanced_field -from unstructured.ingest.v2.interfaces import ( - AccessConfig, - ConnectionConfig, - FileData, - UploadContent, - Uploader, - UploaderConfig, - UploadStager, - UploadStagerConfig, -) -from unstructured.ingest.v2.logger import logger -from unstructured.ingest.v2.processes.connector_registry import ( - DestinationRegistryEntry, -) -from unstructured.utils import requires_dependencies - -if TYPE_CHECKING: - from weaviate import Client - -CONNECTOR_TYPE = "weaviate" - - -@dataclass -class WeaviateAccessConfig(AccessConfig): - access_token: Optional[str] = None - api_key: Optional[str] = None - client_secret: Optional[str] = None - password: Optional[str] = None - - -@dataclass -class WeaviateConnectionConfig(ConnectionConfig): - host_url: str - class_name: str - access_config: WeaviateAccessConfig = enhanced_field(sensitive=True) - username: Optional[str] = None - anonymous: bool = False - scope: Optional[list[str]] = None - refresh_token: Optional[str] = None - connector_type: str = CONNECTOR_TYPE - - -@dataclass -class WeaviateUploadStagerConfig(UploadStagerConfig): - pass - - -@dataclass -class WeaviateUploadStager(UploadStager): - upload_stager_config: WeaviateUploadStagerConfig = field( - default_factory=lambda: WeaviateUploadStagerConfig() - ) - - @staticmethod - def parse_date_string(date_string: str) -> date: - try: - timestamp = float(date_string) - return datetime.fromtimestamp(timestamp) - except Exception as e: - logger.debug(f"date {date_string} string not a timestamp: {e}") - return parser.parse(date_string) - - @classmethod - def conform_dict(cls, data: dict) -> None: - """ - Updates the element dictionary to conform to the Weaviate schema - """ - - # Dict as string formatting - if record_locator := data.get("metadata", {}).get("data_source", {}).get("record_locator"): - # Explicit casting otherwise fails schema type checking - data["metadata"]["data_source"]["record_locator"] = str(json.dumps(record_locator)) - - # Array of items as string formatting - if points := data.get("metadata", {}).get("coordinates", {}).get("points"): - data["metadata"]["coordinates"]["points"] = str(json.dumps(points)) - - if links := data.get("metadata", {}).get("links", {}): - data["metadata"]["links"] = str(json.dumps(links)) - - if permissions_data := ( - data.get("metadata", {}).get("data_source", {}).get("permissions_data") - ): - data["metadata"]["data_source"]["permissions_data"] = json.dumps(permissions_data) - - # Datetime formatting - if date_created := data.get("metadata", {}).get("data_source", {}).get("date_created"): - data["metadata"]["data_source"]["date_created"] = cls.parse_date_string( - date_created - ).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - if date_modified := data.get("metadata", {}).get("data_source", {}).get("date_modified"): - data["metadata"]["data_source"]["date_modified"] = cls.parse_date_string( - date_modified - ).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - if date_processed := data.get("metadata", {}).get("data_source", {}).get("date_processed"): - data["metadata"]["data_source"]["date_processed"] = cls.parse_date_string( - date_processed - ).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - if last_modified := data.get("metadata", {}).get("last_modified"): - data["metadata"]["last_modified"] = cls.parse_date_string(last_modified).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ", - ) - - # String casting - if version := data.get("metadata", {}).get("data_source", {}).get("version"): - data["metadata"]["data_source"]["version"] = str(version) - - if page_number := data.get("metadata", {}).get("page_number"): - data["metadata"]["page_number"] = str(page_number) - - def run( - self, - elements_filepath: Path, - file_data: FileData, - output_dir: Path, - output_filename: str, - **kwargs: Any, - ) -> Path: - with open(elements_filepath) as elements_file: - elements_contents = json.load(elements_file) - for element in elements_contents: - self.conform_dict(data=element) - output_path = Path(output_dir) / Path(f"{output_filename}.json") - with open(output_path, "w") as output_file: - json.dump(elements_contents, output_file) - return output_path - - -@dataclass -class WeaviateUploaderConfig(UploaderConfig): - batch_size: int = 100 - - -@dataclass -class WeaviateUploader(Uploader): - upload_config: WeaviateUploaderConfig - connection_config: WeaviateConnectionConfig - client: Optional["Client"] = field(init=False) - connector_type: str = CONNECTOR_TYPE - - @requires_dependencies(["weaviate"], extras="weaviate") - def __post_init__(self): - from weaviate import Client - - auth = self._resolve_auth_method() - self.client = Client(url=self.connection_config.host_url, auth_client_secret=auth) - - @requires_dependencies(["weaviate"], extras="weaviate") - def _resolve_auth_method(self): - access_configs = self.connection_config.access_config - connection_config = self.connection_config - if connection_config.anonymous: - return None - - if access_configs.access_token: - from weaviate.auth import AuthBearerToken - - return AuthBearerToken( - access_token=access_configs.access_token, - refresh_token=connection_config.refresh_token, - ) - elif access_configs.api_key: - from weaviate.auth import AuthApiKey - - return AuthApiKey(api_key=access_configs.api_key) - elif access_configs.client_secret: - from weaviate.auth import AuthClientCredentials - - return AuthClientCredentials( - client_secret=access_configs.client_secret, scope=connection_config.scope - ) - elif connection_config.username and access_configs.password: - from weaviate.auth import AuthClientPassword - - return AuthClientPassword( - username=connection_config.username, - password=access_configs.password, - scope=connection_config.scope, - ) - return None - - def run(self, contents: list[UploadContent], **kwargs: Any) -> None: - # TODO update to use async support in weaviate client - # once the version can be bumped to include it - elements_dict = [] - for content in contents: - with open(content.path) as elements_file: - elements = json.load(elements_file) - elements_dict.extend(elements) - - logger.info( - f"writing {len(elements_dict)} objects to destination " - f"class {self.connection_config.class_name} " - f"at {self.connection_config.host_url}", - ) - - self.client.batch.configure(batch_size=self.upload_config.batch_size) - with self.client.batch as b: - for e in elements_dict: - vector = e.pop("embeddings", None) - b.add_data_object( - e, - self.connection_config.class_name, - vector=vector, - ) - - -weaviate_destination_entry = DestinationRegistryEntry( - connection_config=WeaviateConnectionConfig, - uploader=WeaviateUploader, - uploader_config=WeaviateUploaderConfig, - upload_stager=WeaviateUploadStager, - upload_stager_config=WeaviateUploadStagerConfig, -) diff --git a/unstructured/ingest/v2/processes/embedder.py b/unstructured/ingest/v2/processes/embedder.py deleted file mode 100644 index 6ed1c560c..000000000 --- a/unstructured/ingest/v2/processes/embedder.py +++ /dev/null @@ -1,82 +0,0 @@ -from abc import ABC -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Optional - -from unstructured.documents.elements import Element -from unstructured.embed.interfaces import BaseEmbeddingEncoder -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin, enhanced_field -from unstructured.ingest.v2.interfaces.process import BaseProcess -from unstructured.staging.base import elements_from_json - - -@dataclass -class EmbedderConfig(EnhancedDataClassJsonMixin): - embedding_provider: Optional[str] = None - embedding_api_key: Optional[str] = enhanced_field(default=None, sensitive=True) - embedding_model_name: Optional[str] = None - embedding_aws_access_key_id: Optional[str] = None - embedding_aws_secret_access_key: Optional[str] = None - embedding_aws_region: Optional[str] = None - - def get_embedder(self) -> BaseEmbeddingEncoder: - kwargs: dict[str, Any] = {} - if self.embedding_api_key: - kwargs["api_key"] = self.embedding_api_key - if self.embedding_model_name: - kwargs["model_name"] = self.embedding_model_name - # TODO make this more dynamic to map to encoder configs - if self.embedding_provider == "langchain-openai": - from unstructured.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder - - return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs)) - elif self.embedding_provider == "langchain-huggingface": - from unstructured.embed.huggingface import ( - HuggingFaceEmbeddingConfig, - HuggingFaceEmbeddingEncoder, - ) - - return HuggingFaceEmbeddingEncoder(config=HuggingFaceEmbeddingConfig(**kwargs)) - elif self.embedding_provider == "octoai": - from unstructured.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder - - return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs)) - elif self.embedding_provider == "langchain-aws-bedrock": - from unstructured.embed.bedrock import BedrockEmbeddingConfig, BedrockEmbeddingEncoder - - return BedrockEmbeddingEncoder( - config=BedrockEmbeddingConfig( - aws_access_key_id=self.embedding_aws_access_key_id, - aws_secret_access_key=self.embedding_aws_secret_access_key, - region_name=self.embedding_aws_region, - ) - ) - elif self.embedding_provider == "langchain-vertexai": - from unstructured.embed.vertexai import ( - VertexAIEmbeddingConfig, - VertexAIEmbeddingEncoder, - ) - - return VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(**kwargs)) - elif self.embedding_provider == "mixedbread-ai": - from unstructured.embed.mixedbreadai import ( - MixedbreadAIEmbeddingConfig, - MixedbreadAIEmbeddingEncoder, - ) - - return MixedbreadAIEmbeddingEncoder(config=MixedbreadAIEmbeddingConfig(**kwargs)) - else: - raise ValueError(f"{self.embedding_provider} not a recognized encoder") - - -@dataclass -class Embedder(BaseProcess, ABC): - config: EmbedderConfig - - def run(self, elements_filepath: Path, **kwargs: Any) -> list[Element]: - # TODO update base embedder classes to support async - embedder = self.config.get_embedder() - elements = elements_from_json(filename=str(elements_filepath)) - if not elements: - return elements - return embedder.embed_documents(elements=elements) diff --git a/unstructured/ingest/v2/processes/partitioner.py b/unstructured/ingest/v2/processes/partitioner.py deleted file mode 100644 index 71bcd5700..000000000 --- a/unstructured/ingest/v2/processes/partitioner.py +++ /dev/null @@ -1,165 +0,0 @@ -import asyncio -from abc import ABC -from dataclasses import dataclass, field, fields -from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional - -from unstructured.documents.elements import DataSourceMetadata -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.enhanced_dataclass.dataclasses import enhanced_field -from unstructured.ingest.v2.interfaces.process import BaseProcess -from unstructured.ingest.v2.logger import logger -from unstructured.staging.base import elements_to_dicts, flatten_dict - -if TYPE_CHECKING: - from unstructured_client import UnstructuredClient - from unstructured_client.models.shared import PartitionParameters - - -@dataclass -class PartitionerConfig(EnhancedDataClassJsonMixin): - strategy: str = "auto" - ocr_languages: Optional[list[str]] = None - encoding: Optional[str] = None - additional_partition_args: Optional[dict[str, Any]] = None - skip_infer_table_types: Optional[list[str]] = None - fields_include: list[str] = field( - default_factory=lambda: ["element_id", "text", "type", "metadata", "embeddings"], - ) - flatten_metadata: bool = False - metadata_exclude: list[str] = field(default_factory=list) - metadata_include: list[str] = field(default_factory=list) - partition_endpoint: Optional[str] = "https://api.unstructured.io/general/v0/general" - partition_by_api: bool = False - api_key: Optional[str] = enhanced_field(default=None, sensitive=True) - hi_res_model_name: Optional[str] = None - - def __post_init__(self): - if self.metadata_exclude and self.metadata_include: - raise ValueError( - "metadata_exclude and metadata_include are " - "mutually exclusive with each other. Cannot specify both." - ) - - def to_partition_kwargs(self) -> dict[str, Any]: - partition_kwargs: dict[str, Any] = { - "strategy": self.strategy, - "languages": self.ocr_languages, - "hi_res_model_name": self.hi_res_model_name, - "skip_infer_table_types": self.skip_infer_table_types, - } - # Don't inject information if None and allow default values in method to be used - partition_kwargs = {k: v for k, v in partition_kwargs.items() if v is not None} - if self.additional_partition_args: - partition_kwargs.update(self.additional_partition_args) - return partition_kwargs - - -@dataclass -class Partitioner(BaseProcess, ABC): - config: PartitionerConfig - - def is_async(self) -> bool: - return self.config.partition_by_api - - def postprocess(self, elements: list[dict]) -> list[dict]: - element_dicts = [e.copy() for e in elements] - for elem in element_dicts: - if self.config.metadata_exclude: - ex_list = self.config.metadata_exclude - for ex in ex_list: - if "." in ex: # handle nested fields - nested_fields = ex.split(".") - current_elem = elem - for f in nested_fields[:-1]: - if f in current_elem: - current_elem = current_elem[f] - field_to_exclude = nested_fields[-1] - if field_to_exclude in current_elem: - current_elem.pop(field_to_exclude, None) - else: # handle top-level fields - elem["metadata"].pop(ex, None) # type: ignore[attr-defined] - elif self.config.metadata_include: - in_list = self.config.metadata_include - for k in list(elem["metadata"].keys()): # type: ignore[attr-defined] - if k not in in_list: - elem["metadata"].pop(k, None) # type: ignore[attr-defined] - in_list = self.config.fields_include - elem = {k: v for k, v in elem.items() if k in in_list} - - if self.config.flatten_metadata and "metadata" in elem: - metadata = elem.pop("metadata") - elem.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"])) - return element_dicts - - def partition_locally( - self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs - ) -> list[dict]: - from unstructured.partition.auto import partition - - logger.debug(f"Using local partition with kwargs: {self.config.to_partition_kwargs()}") - logger.debug(f"partitioning file {filename} with metadata {metadata.to_dict()}") - elements = partition( - filename=str(filename.resolve()), - data_source_metadata=metadata, - **self.config.to_partition_kwargs(), - ) - return self.postprocess(elements=elements_to_dicts(elements)) - - async def call_api(self, client: "UnstructuredClient", request: "PartitionParameters"): - # TODO when client supports async, run without using run_in_executor - # isolate the IO heavy call - loop = asyncio.get_event_loop() - return await loop.run_in_executor(None, client.general.partition, request) - - def create_partition_parameters(self, filename: Path) -> "PartitionParameters": - from unstructured_client.models.shared import Files, PartitionParameters - - partition_request = self.config.to_partition_kwargs() - possible_fields = [f.name for f in fields(PartitionParameters)] - filtered_partition_request = { - k: v for k, v in partition_request.items() if k in possible_fields - } - if len(filtered_partition_request) != len(partition_request): - logger.debug( - "Following fields were omitted due to not being " - "supported by the currently used unstructured client: {}".format( - ", ".join([v for v in partition_request if v not in filtered_partition_request]) - ) - ) - logger.debug(f"Using hosted partitioner with kwargs: {partition_request}") - with open(filename, "rb") as f: - files = Files( - content=f.read(), - file_name=str(filename.resolve()), - ) - filtered_partition_request["files"] = files - partition_params = PartitionParameters(**filtered_partition_request) - return partition_params - - async def partition_via_api( - self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs - ) -> list[dict]: - from unstructured_client import UnstructuredClient - - logger.debug(f"partitioning file {filename} with metadata: {metadata.to_dict()}") - client = UnstructuredClient( - server_url=self.config.partition_endpoint, api_key_auth=self.config.api_key - ) - partition_params = self.create_partition_parameters(filename=filename) - resp = await self.call_api(client=client, request=partition_params) - elements = resp.elements or [] - # Append the data source metadata the auto partition does for you - for element in elements: - element["metadata"]["data_source"] = metadata.to_dict() - return self.postprocess(elements=elements) - - def run( - self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs - ) -> list[dict]: - return self.partition_locally(filename, metadata=metadata, **kwargs) - - async def run_async( - self, filename: Path, metadata: Optional[DataSourceMetadata] = None, **kwargs - ) -> list[dict]: - return await self.partition_via_api(filename, metadata=metadata, **kwargs) diff --git a/unstructured/ingest/v2/processes/uncompress.py b/unstructured/ingest/v2/processes/uncompress.py deleted file mode 100644 index e0b826461..000000000 --- a/unstructured/ingest/v2/processes/uncompress.py +++ /dev/null @@ -1,43 +0,0 @@ -from abc import ABC -from copy import copy -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any - -from unstructured.ingest.enhanced_dataclass import EnhancedDataClassJsonMixin -from unstructured.ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file -from unstructured.ingest.v2.interfaces import FileData -from unstructured.ingest.v2.interfaces.process import BaseProcess - - -@dataclass -class UncompressConfig(EnhancedDataClassJsonMixin): - pass - - -@dataclass -class Uncompressor(BaseProcess, ABC): - config: UncompressConfig = field(default_factory=UncompressConfig) - - def is_async(self) -> bool: - return True - - def run(self, file_data: FileData, **kwargs: Any) -> list[FileData]: - local_filepath = Path(file_data.source_identifiers.fullpath) - if local_filepath.suffix not in TAR_FILE_EXT + ZIP_FILE_EXT: - return [file_data] - new_path = uncompress_file(filename=str(local_filepath)) - new_files = [i for i in Path(new_path).rglob("*") if i.is_file()] - responses = [] - for f in new_files: - new_file_data = copy(file_data) - new_file_data.source_identifiers.fullpath = str(f) - if new_file_data.source_identifiers.rel_path: - new_file_data.source_identifiers.rel_path = str(f).replace( - str(local_filepath.parent), "" - )[1:] - responses.append(new_file_data) - return responses - - async def run_async(self, file_data: FileData, **kwargs: Any) -> list[FileData]: - return self.run(file_data=file_data, **kwargs) diff --git a/unstructured/utils.py b/unstructured/utils.py index 03632e37a..523fcd4a0 100644 --- a/unstructured/utils.py +++ b/unstructured/utils.py @@ -10,7 +10,6 @@ import platform import subprocess import tempfile import threading -from datetime import datetime from functools import wraps from itertools import combinations from typing import ( @@ -238,36 +237,6 @@ def dependency_exists(dependency: str): return True -def validate_date_args(date: Optional[str] = None) -> bool: - """Validate whether the provided date string satisfies any of the supported date formats. - - Used by unstructured/ingest/connector/biomed.py - - Returns `True` if the date string satisfies any of the supported formats, otherwise raises - `ValueError`. - - Supported Date Formats: - - 'YYYY-MM-DD' - - 'YYYY-MM-DDTHH:MM:SS' - - 'YYYY-MM-DD+HH:MM:SS' - - 'YYYY-MM-DDTHH:MM:SS±HHMM' - """ - if not date: - raise ValueError("The argument date is None.") - - for format in DATE_FORMATS: - try: - datetime.strptime(date, format) - return True - except ValueError: - pass - - raise ValueError( - f"The argument {date} does not satisfy the format:" - f" YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS or YYYY-MM-DD+HH:MM:SS or YYYY-MM-DDTHH:MM:SS±HHMM", - ) - - def _first_and_remaining_iterator(it: Iterable[_T]) -> Tuple[_T, Iterator[_T]]: iterator = iter(it) try:

!+p8Vx#4$$Zmu6)6 zT6knO+qi+`ij7gwj9xX6tzxm&W-W#`hXLBd{u&w`Dn0Z2JNxB{s)^{`K zYYpaBQ{7wy4UJ+GqfrHya9j45?r@$^Mr}A+zA15gjK&f!S@&4>a9S~J#%L`bCOyMW zMBvjzOS`C{1UIM2^@Uix^=eKv<47U(r0@oL6XCXp8@t+&5AyG%8 z3=S3nz7jRJde}PlzSTPaL&8yx`2b39c4!Dl7L0i&dgc)BQ~VO4bbyqY#Z^lJ2;Rin zmqK^~%19)XN3FsORBx6qEQV&~tK*lBu?H9sE)CeTK|llTqC6fO!Fc_9JVTDNp#B}g zeU%K6VYNNj+K@?VTUD5Lwz2m5hN)*aM~e3N?#sM-ZD?K$3LpEgsIYRjB1)>;-^4xs zOP@XgVONvs_J2}N^oJbn|2-$7Dp;_AW{VlHa7g4IG(Yp?5O*U4XtKPLGvgKiQ%- zo~SzXKf7jo@LTSyiVLjnjcK$SeiNCm9$vzMQ%fnkD+oZoIBErlT~!U7w+zZx!Nuvp zo*Ie%IZ?aL%La1f#{xV_sKzA+Z_(J}7SW5Qq!hQR_A%LOl@0jDo}jvE&@>HO?uzvT zeezpBw%py%Z(+-k!MP+x4TsP%_;C+UF=+DDKU@%BKhdTNYNzN6giFGuNBYdym9(x^@CUBkUy)sB_a1x@OuRIe1Y6JP@MVvMsY7MdDr=_Irf~_y_a~ zU;HdpaJEYLwR*|=%dBn@`Iw7GNblYE-|0D&g^4@X$_iIq=@vWtxo(H6-5b{8i~^cX zgs85pO)D}Y*k`HyTdtDE%hp|0>;2Giycqp}kW)(m-SD34NWciBrY@2*Qk{m9>&zJ= zS*wRg4mGmee3WGw0~tooix@NfUD?IFJI5YW9U>jC4E;FSX@HucZcT6qcZhswGVQ3? zYz?Zmv~Y_$T20dXMM*uoRes&zuP69xi~f2C|GR?BMUdb{1Y2&jCdXi+!yY05m7He2 z&-hRh#AH8L%AQ7 znfAYUORt0Sq;|U3^nukOY9H3jqtC6r6Ti=40}c;0B-~T|Ioun3=yR&w&!E{#i)it; zb0&rx(6swI>OcTq_(K2=eF^nettA{m|58}}pUb@`n!)L?IpR3`U;4B;7vv|%&;I9M z{xAP;gfM*eFjK>`f7eLjU;U&2xM@?sy9HW-C5fE?`1@S`t;$2>sO3dkQcli!@XHa~ zT1t`M--9W2sL@qFhuWqp0?p^We)hQSdGQORZ_a*s&zD0X4)$Ro@=z^TZNb?zW-T_% zoh$BJ@hYFg=;Lcu^EJr(f*h6X*aKoo#&M&UL$%hT-#$bH!g4_eXIM`{^(X-;zbKT6 zFU1_sOCl%L#Xp=>$R_H?&Y4h^e7QmX>HbMb0?jQ@6*1&TPr>g!`vnxmT)%9af-}J5 zkvy$~PlBuNaK|44hYrG?MVZ#J3#h#uh{iMR+$uB`*r0tH>YVFg4CBB zWV?)NVZ--1z2`~c%zeOwkSjR90dL1tE#ggqD0iLm>JN^lxn50f zsx_@u0A^x=wUqKQ-qV(B~YuFCbOI!&9<-{!JOW75gAE(g*^TiFU*jo%>80C_Z zKfYH=Kjh$&C+RTw16jwrztU|ckyy8{Z%*MnK?v&+Y<}#+6A@UC*sRD3`|m^2)3C*S zA(kwPERw>%O2K-R83u=Jdw#C{bjDl_a;m5S{6Ad`#p&W^v7TF+K9f3A+S}W6er&8; zUx8!sV=1@_>eqEe-)Cl!6$2# zSieOSFNnUH>3yo^>E9^w@&a}7>phJJ&zV!e6R(GM&hda*crV=>N;EVC zSGtg|h(I&JJ@zCtMt9uTPV@x$*JUBW(#I{@1y?+rl#p#8)qCD=>#P2p`h`nSHD6}ww zh+i<>1P07&1{Em1wAPv(eQAD({uqR%_zfRrGawQ8eE;AA=fvU|eBl)i)8Vm);ZunC zA>s-}Hktz-Z)9OHjuq=_cC}$F+Bh&Abh9Y~7r+CEP*Ls9<{Vqa_KYJ#DOL_Kl5{^G zr@sT6CG)t>BL{AB5rfeWf~A?MB5rTpji-Az)*9sHz8i^wtC30d1UR5LTCC=^NM%Sw zvz(s+VSBA!O>7T%=+%IwrSbe1#io)DeWSoJxz*yq(?VY7Ys3?!cvv!QmHRmYI1m?; z-!cqFZ6Ffh#W-`z2q}gIa~!e^#35jp4#uhq0%qXVt&xbTBBYq;+37SzrIC)~lDSGQ zdI_!w)#FkiM2y0wh{fM(012fdX?~YfEGz;Ty zT_zVul}r#!hg#7c9)13YuJ&Um#GparY0z7@Q#Ng3!+C-W3iuC> z4u3tEks8^GV`U4mRGPgORZ5$7&0hM3Qg)lBysXTubGVV-^qs5J)ZAT&+@BNN?JFvY zBL}V66_4K;kC;M?B&BEG=)A&mf2{x)Rp4?V>XA-g0*L8xa`3M6g?-yW)C3hNE>6dVLLwv);iKS2mCKW8HBr)4sT-O&Ll`RH@4q(}BFtbF;p&E}aH@Ei{Q3 z#~@aG;Y~^o#2wfUWL6(rf2W7|CX##;n({l>{OcmI|A_n zM~_B4-?N>WmA+_MndNnYiSXLN#0IrW{N;o8?N9nYD6{Z4-`Ei{7A3(3g@f}ZN zH#6wY>*mnK>D_k5$*wLHs&pN)vJ`JS6)a14NRR!Mglo(y1nGX0YwwO>?w_WA$}yhW za8>+l;K}^Fu`4kSquC{syz&WOwGy=pxhugNa@&$_l3CX4%|Nks;cpji>jb+$*QL_W zw#$4~U$yAQv5RI#oNj*>b*f&6s@8jHqG_4f(7{of!`jBnY6JboGx){%Xtk|r5 zntzHrR=0a`{CT(_OR48=B5?#s{x#H5DXTO6#t7CynvNg(l>5OT1VdbA*2S4EV z!G27RLlt1xw&_J5>@M5Ul0=3@4BI@8s)WVzOnZ(#wccP+)uWubjnBGSpKkOimN;OR zjA~^%TiiN?uPPWCYu};lJ$(xQ<$KT(CcU zIDnaG+}c4ej&0MSn%mGX`35!{Z6Dj6wte{2k!oiA86pZv<_?&ecjL9%`jR*Ip-X(rZp zQ}tkhjJKe&d<_(RXKUl3HyR@ii|dn^SJb?*y*e=G_+bb1dp`&ibYId)(j-z5)$A+S z0s6wKG)4~OGUZ6H5Wz|y)Au%yhOTW{TXw3KcPRuBK8Tsj9$=uio|~mu2)W>jvXSfW zSb&@XVI3FU>n`t(MKM27`n;8InOqaE)KMTzXvClB*-{c$?tZu&X8L_%J@!LvDlCH`8J2%d<`NEx6*YCE9~m_QDYKUP1x0|nUXr*$gUP2r65^4=Pz^( zdkp8PL;Xeeq-C^6^-SPci0WHdVZP+1BR3E-aakA{`1Rgb zVV!WbbJ{j3l=lMS~`y0vyaky%1n$l#;+$@!sZ$kl%zyg}wMR4<(V>vT>Xvn7%Iu zw5P;-y;qNu>X`FSTTGX`_K`U_X7?eFj(4|9D&80R9xFqqHpFlS0Xlt(rh{f#i;%^+ z;$`(zGNiG*r!p5$&3#ET(a(=ei!JqpD6SLZ!qF;=MDe^)wFM-K4?GcJZ{;JXXq9V~2UC)8fQG z)SCkEcuSHlz5U~%mG)xVtIX44vt%wipy&0?#ojmyqqo~6f?LZ?2?GRb^T4wT2{@i5 zJz&=s!J_mEfaa3#sz%>F6Bhw@nT{v;M@#U4m#UkP4C%`{_{e^#nrUJ(F%(#MaW>L> zUbi%bz7zjcw_2NJV{^ZF@0Yy^DlBg85sO_}BfB7({;;#)vq!F^od%d=2FpVRvFppW z((W(<@tq!13Chdc-glK0EKJx!rJol}w!a&?nkn^hsFA_2X=meW4#Df zY6u*noiDYU+S2#T-Ph-%u{OOWJ5+R%7e(@h8@<~Z&k(M>Ctwf>GbS8A5z%{C(?|%R z)ln`tybfkt5=1-g=BHTk0p|zhW>Cezt}THCDR0JQ|MIOTmaD6)n{g6u2WB*+t_?Rn zO-?d@*(ug#>{9&kxeD&h${>CE0-Mj$`AWMHV3JKa(#W?jAX$M?9;I6oRO52G=?y$@ zrC_)t9jHl{EpQ-CGpXnNb7e_c(*~_Y$NYjUT5T6Eu)5#Y$ZqaZ=EIq}G>gVDGR|bD zD^0a&q=7H_fVW9HGovG%FWEUcIaw)f7xU}tZc?X)?yHj-YqzkKNdV;(%pRvUQiEmtXO~G?KudsQa_# zMH|J%pXRDo&L1zgZ?FEuKfYU|cl2HE*vlx>$w1c(+Yc(1>`tMgow+g{c$`CwL%~AQ zr=&0lsRJ|M{aoE?lUA^?*9B-{F5&V!MKK`l+PBi;3e9;_tB_$Nj@IQcYRmevI_9;eV`C^ zZcA2o0nVM%NX@Ar*$@RhsgJ&#u;y=j2nearKlhK5v~V2OKAS4|np3$`GGO)U{Y)`d z!cD{ENQ9RIF*z1M-g2Iv?EZV;SN_~TEQlZxT)upY@HN#T!D)j%R2xlSwap=&{JD>l zu(uLHbTdJZ(vrO3YCYGTcTpq>*Pf2^`q^88mF_{k3zFs5i0yVzZ3#w;p`y%ui({nR zY>lRlEq$hEpj7_3#rb~4eT48FSiR?~XCYNeZi2Mjs&f55rx;J6(BDsSj zFL4r59iqBA1`*fO_JZbiTo)GuHOhvp7aSROmiq<<5=K!5dip3lOGQpj_>^QudS16N zq4K>$gJuJh>XNXkK9E86h-WN{=sGU1dl%~e%b%Dw5H11h7l45i-d(lCV~iuRReG~t zqz(Hy_2x^1*4@C2eHJ*U$M2$OSbss{2{s$P}!X#w7@aag`^#c$c3}5rq0TSWC`u>TR=Qs8~flt&f<<8<%kHa zag#hAr@Fy_nBiH$;^h1xJBbN66;`LRG<<*;Is|kb}V0?!)_%qi|i1*sR#V+>}S> zPEZnlqfhkvP`Z=J8qkVb%1b&e%@oeIDZM!K8Dgmk90VmMCh=$CAb9zQgWzVawS=m& zB0CeoHb<{UW1fG~tM{V^Es~y+Qmr&)ez_wQMVWAi;mL@V0MuOuQ3-X#RFI+WxfB}2 zK5g*)lCU1(LAa`FQ4#hyi>=i6JLEjsKBlcQOl-Nv`cSc zP^9a9-YhfW+7aq_+NbvH!Q05sr;n$hb_$0VGArU3UgfwpV*{n!`vB z?;(gpfh2oj+tenU5+@|(xdPGBC=i8@mR@exDUR<^7x7lP%{{(l&)?MBpr|Gf&zC1haZI%0j={7!8fgn z1zZjGP`%j@cs0HFFvXw(Aun{|&$(}r^H5pq@Vm&`v%&{aB(D5bc6pYH`_NS{s+z?I zrk6Hm=jMRD{C#uLaBH~w`1ly(W)c@J651X)ap^6|?ne9x3q|!SO{b`Hk5yq`WO==f z4~674VXpI%s)!tlgaT@p`sIMXr{wwzaj!R;Us*P|%dsCt5;_60R?7ksy`kXyXLHZX zcnLAN<(?wnsC4RJpFZ63;65Y`zxmhSRV55K1zBOTL6`A`x?r3!AFjlO3gSO=I`DMK z=2;KxboF=NlAYc%TU0^M4!6qg--D4R67i0T% zv0uyhYlHm%@SY_yg?4~a&eQ?E@+T{wfs5Z5+wRf7nU*oTTy_3DBY?SCS`%p&%nM%Y;Pv_QNisu@K zcy;sI$&2C8f%`m!a`GZPLN&`JzMWeIUn*aKiR`QmVUY3^Y~LKW z*0Pwvh^%ZAO#P;m9+L6)1A}W8+8#nalgM~zwro*jca2x2^ng?Y;C{yo`rM6Hq49-- zVYI!T2Gh1AU)GxkJ(ofRdST?a*W=@9hGN9D0<*C+fdQoMl7g|Po+!ESy2ezpr1$oe znFrGF>xG`Gq0~$F?14|!Qr96(I52-bT@qSl(R`#ditJCIRkrW>)}lq^S5Ld z)5SARSEcXSj_G)RLWppZ2bp_d{@f*V7e7fl!RKC~=#|s5Uxz6G=1r#~DX_ep&y|wc z4-iu29&HWn8g=yBG%AF_Pbo2*cPr0T1NW=kjmM&lKbiQhkBH2mHsH>2xwTd>bEvEw znAltGq+$t8*oJZdWnhLI?~80%6YbEh@HGWVt1QYace}GJ#U|Cn-wYj)U~Be!Y;fgp zyQ1{H$;vv3yXzH5V>Bs{KALCx^En?yoLx~#zElG8w`Ki`rV2Y215d2s{C0+NI>P<7 zhB$&ODG0rI=fP8F!{kPBJl1fZMi`p*JH~BxFWZ{`uKb46pPK+em0KqVvO0=yH?af| z5{?fXJbpwKT@?X?xTH%GV~`~*8_fcC!qh0zQjCN>;td>)pGKBLrj)rjeCk>*oSEw= zwy-^ll-XAwK%Fgf@75la+rc7OoxZh&p}yzga+CTnq_5l@0z}>cVy88t(bbQU_M z%e&qnQc+w{A?E?}uVNGT&viavx3olH5_n|UyBYiG#o1X1M;-{4u^0S8h&)z4TW_*) ziGRLKvs*b9ZI}I2(5l(Ut;6h_KKi0vd$b!%F93b|%`E)@w6%!yTkg29&FHtBVOHE_ z=GTYa1(%#_s{F6sCc3q`=;AW6OWvDVdu@S(#28}w8L*D{Ywj@PpG^7_75sC# zqxPamIFBYqq}*^On~n4S_HJio0)Q>|HetLz0*02dg#hqbH-ElUdLwva3}_(@!J7!k zWixjW>YoXHury)w)pgP(kPh{P(J2CsY6_NhJMLNp9kNYlSJOZtIxTxSM(9{I7uD-f zX)BzYVN6L(ypasPeDR2lq4)LkN$9}E9f;Z+3x#-FKJh6qojkGBB0V?VBD-oVU-3zKo8LlUHyEg<7RHbW7F&vR`s7(4bnOzM4B<<5p&Y;lX!)S6MX-{emt6(NIVd1dJ-8(+hWrXASa8+}Ez6&m?b^T*SgWtJE@Fwo_gs>Er0s1?gB`l8oXHn=ocsqW8+| z`~XS+PZdpCnoyT5XENo%e1n7VZch}0f&!5&U3;70ZRBL#$^)p4kfg)MoQ6{a`;4lk ziRO3UJ*S*jOzTqs*n?bU#><*`vS8b;b1YP(;W92-JCnOYNO_WWBK<}z znLn?@Is!_a=1n($sTf8nGw)B?%*(u4I0uDdOE=ptFs2g5JmJ9ut6i9IAy+#|O-_l=0DIkX;jGN2RT;n(nQ;~ z#_iKz9N|A!q_u5{>qdtFHoID_7Dz#-*uIb=zx{52(f8Z%JGrZ$y(#Ec+OtO7unsI_ zA-2ToLXvPC`mj5n{MKm$dX%||l*PNv_g3yrRSYFF zvE@EL)0$?Ug=x!Pv|0V|Iya^Mfr2xbRnpHQUcD4k$9IgVSL`W*ZM_4r`9`S+W+>9* zPUjD$v*xgvSAs<&B_WUKg+Oz<61}y8Vhl2ur;MJAZY#U|j6I_!zNC|uza#rOp5u*# ztI6?pgKoR7j;ZX@7J0w-4|#mK4T!X0I$}#Pb28md*RSiqvyx7?(1R<(;p#uv5J{{+ z-IAR$lD-nQ2X&L=R1kB1UJ~X*c5b5kd3zb8>0avzh>SjM<3jNZYkBL(YvPfFt`|Uk zlDi2RL9)xe$bfREbA;?$ADjrjWk;?{7z2FfqxRsLK(P>|!b5(5;Mfwp4=a9};U$6d zr{A#vChf#hOkHV4HFm`6fwFEP&NJvV6UC4r2&1kEYj3%Gk<+4~3lmEw!mHKeeKqAH zUryO4d5vo2z+-d|`Q20Py|^BAVK!JvEwz=I%#V7=)IpC&UC@%!NW%Ufv-k7d5^Jg{jhI!1ix7uf#+>Sp? ztx9%3P>vjMRCx<4%OxZ0kj9L8yw*MQ4KStGkTeDhXJ9WNCZ_-}$tiD2ooR0z*^**7 zE;fv>)*jT#IxnrAU9mAa3oxA($~miUoh0MF5>vkmx%$EG6S@+5rRngyNl5;7-X=aG5M|V zVHYO(b=N|grqBDiDn^m%H1=H60Wu~Rb}bKr!18eES&8hi5{3usM^D`L@4v9@cjPmq ziVHBHB7^p~K00GTM|TcVs+)72w#G`yVQ4Y{-|*<(7iBkB0Yr#~z^O>+S#CehnCpa5 z(;vo06Utmk9vK<9rxWFqIvJSehdV;Cu8`4wEvPTaItjzmZtC%RyFAI^>+R=N_V;ob zRiEn{HR7n7DIDk@NH>yZ<#mMxzi|(5;R5#3*AUq8w!mA*vIXVc>qLAYB#hffj%9Ue z684Q!ZC0pOCEBDt#E;?5aT+$TjvG<(g|usu;uN=(Y#{bc}&z7NA~ zM!%xV?uv}?q-~H^uP^cUV=F-g-rRH_ND`L|$+|QiAUSj;%jU@s{6k}gw=U-oy}<15 zd<_vQ$?+cH^KEEDk|hN2BX>Pr7%?IKo=KUg(!B;8UvqG0`r`2?8L!-!T!=W_u7;-H z39HXdxsgV8hu63Scrfc|8_+6nBi}%hpRC*(GT&yf7Y^Uu1fIZ% zM4BjwuG68+ciA{lb@}=9K~?@i+;czuaoy6^7SS%_^E>F{ikaKn+vn@8wFpaiPB?%4 zZP#$fYc^SGOfgX!Ro61>|BoK|_q*kYeyTt;Irb!sKF`nK&F$*6KU zq2^0t+$|Ko@x8r>|MP6q_|Mf}QX_5auA8YrO?(^^(%pUouWtP$lA#AE5$&U6ZWpMb z!?z1mLlo?(fb+5!sgrhd-qe2m0RkbLWAR-Zlw(g(QjiMdpnCzyEVnfo*4A2erbeQj z>4D-?Ci~m~N=sV?jX?2ykx^(2Ow~U0QnHJULftDdSyvbD8&*n`GKH!#pqM_~2S&Z# ztqUJj*jTRbAc!OI7S`OwH6Lg`<^@H`c8h*!p`UB+6w5YhvKMe|Nj*7J?sG_1Pl@HV&osa0EVq7~ zFe4(jCBrUnVMA}5j1bRQQZMgl%Z{8&KgZ zqpcb<38BJI?+wefHeJ4wPjZni(J#;N@Ms`f76HO^g#LL_JNu87>+v50;ytKisB-&E zSFBARuSuyny+z%n5`4M3MG2r(ia%Q?^zGX9tr!k!^eL7Wg?ipY@y(~W*fA*2P<*~# z_U%2NZ}5k&?!C=pT?Z2I0@~7YwuSF_OT~Cy(^;O84qf>}Bg_yLY8OR?0)^aqoM-sc%rB1*L1TTXFv#p`QsA@BH0OS)l*#1$oT9IH zMT$kc)Vm(syPQdtg|Rc6zL##N+LbVw>a3>{=)*)QYf4~2q>vwzdcY*cFt3TY)R)a` zoWoCcN}pCLSuMa%B22?(u!< z?~tLwAGqL$F7nVTvQ9(@Pi{1I^ zH5(*0hE6O+fk9hH4?vB!&6r^vT>)>{Oiai6;q<}{?+T*~7rLj?sAYykzYM?q1_Y?b z0E`gOt6ewM8Q@{N{v`mjJ`z_PPLO&=^H9<#{LB{w+=B_{6=UzmP|(3o zKtBiIO=JXj*S??Yqd*p%n-XVug62p0=*63uo3b-GH8nLMBO&>7U*TVM4{FNdBme0M zc`XXo4%&$JniWvtGA3Kwk_>ZK*@;v}=Y5qS+*t_slPIqUm-u>HhQ5h6lD=pnIR<&M z8YNBqf;s_8l(Owehipd(`tVoH;Kf>Lp4_7Nfluftz6<^94f58?j>@3SJp`%%>3N(! z=$(`HeaxNLbn<^^%TYU9>1RusCU9d=S zm;iReRfzC;r&Af_??H_J`tctCZ2uPHe~&jMNW3xh?2NmK#2YQE&OS>ba(E za=4mE5XQm4wM;2HW%hW8H!?lEM;9&q=tSdTgL44UL+Lo4^(I_wqxILtel6p#4f6Lo z;Cn9h>plCQA3>C%M+2jcF7jsRdknrn5N1;JU%={#V=ue8b|Nx>U6#Ezd?s?yQk&$> z(W>R%F#7P*iKeKzvc<67d#OUMe}Not*8!wH`-qE5;to_F%Q+l;=&A=W4t;4WF<>_- zZ6%k|lTMzez)lQ+sih3He}qd}ORy>{{>)MS$FbO7vVxy$?jjdCqz^+g29lpVy8mt3 zVtx_wfWfaPM1THDKal?Hz=eR>))-&u__Gjg3qq_zzGz^V{35+)VF!(LCUohDX5|4E;Q+Rh&ezPsu8vP*Ee%ie?Zne zJK_M$pblUL!_!YFQOooQn4$7J(hv&e;H-DTOoUWt451pD__k4p0K)!Qhe5wp64NUo zbjtJFu#+}2jbF=e1dQ13Oj@GcMw|8hv%*@!jT%@nc&<8}5 z+&d&(Q(3{S_Czc4u!uULJt>r=8Q?|r+$#e&R+``a>0bCAR4r+1rLzwtUuU35i-?X3 z*Yi%v^mUo;eicWF5J~*eAy6|3D0Zw_AY6;^f11qyLi7H@DgQ!X{{piAg6RKF><1t_ zkhve3UIq3X`QL!-=#H-Boz^Zhz!bCK0tf({TUG^p+L@(|et;F!P`2DAaWyc9QK^QX zv#iJA_%@DKq5y<&>$CKnZTX)to-kQ0dY5NO+MzF!wIh0&h~(lyIA3)4k73n)kRZNL z$MBa9Kv6mX=d4Ofp}#rn)ir33khC9O?iOLDJj&q@yl>ihes`L~((aitGM?9jQfiB- zf07f2%%9>(fJja%&3zhZn}m!?@pVHa*lW!{&0+ILXG1OSk|i^xT$Na5&VjoWeM@#f z{XHIzpPNr2XVnEl60CdR(@1vzDkoHJpiIaZ^~x~g&u1_J#?X;K%Fs}Mf&hKR__|Z1 zSjOXrbpBJT01cq67LSS{VVLk0#V20VhRnt|@wx-%^*b)i*SB{xEO7di!16w~AVF9n ztZ3>#+?VnvQnUHmd8zgrALHsAVX=I%#>RJrVkcv~@xNAJfO;9Jut*D_P3 z#z8kIpU*&rM!$TnbWX0qXzI*;ZfG;7K%y5%_6-;gR}ti-n=I`B&s1SAOtv~Yhk-p# z!YKe8W>?ScuZ`rN|I{yopUo_eiDig}M=!wxX}=Pb{d%ld5i0AKjY5*4!eMjU`G^hRJ=IN@t z;vYl~fF9?MzJqqLeCc!%#sZk3mXSbzlYnQ!mfXLUie9>sMGoyvH$K)o=su)zjiIxw z=!7QY8~yu5iyYJ6MMcnwqQGqEjJ-|^y>l+Kd{4AaA!}Xu_IEi9_Evs)YCEC|-EDtj zH2jUkwI|?UKoVod_rXAM8C5Q{<=QX~PYgC;9AZCk&zfC}0U3v=zI>9R2X~&O&(%^! zf~|M9(uHjTiX~Y{PaSeW=$GS56xaSNG*A=&k3s`Kmj0orer=!}DfXHthdJnVr1arW z}tuTj2bk zd1F=(rHJe^BmQZ!c{TqPBmdIb?#K`11f>u$W;%nO<^x3zx6*ulAIgmyCLtxdcRda1 zU=m_$^|dJ^*P8i)GCblHXjtPHzkVcPKT(E=v+&*e^E3@38W~aXnJXAQ3(bw|EJe?#vMjZAxl>RmrTo@3%&IbCJ1bazcE1)Z7pB6jtLa@e&y1P zzbA|$IRxZmo804SX|G`vz}L3$8(|Z)t+Uq;3M&l3Ix`AseVL$Mp`dv01QV;X@MsS* zX~L@v9Wvn2GyVl3+@Yqpak9?@VAP=R<0J-`)`Kz1FA~cCWAqxR6A|D6Hc6yK;%F=_ z5{nv8Wn$7?m-i5=ks;V%jU31AxV-@-hmBA8H6+oiq6J zh+!4fHr61&I1Tv)Cp(ZY(bNVgR1oZ<{dBdGyL~Zo2^oSLi}669qZ)qzG9rJF#(+bGqM;V1r$2p${}=^0!a2OsnUHt%KleF^VW4I+jQXkZ!ykJV6y#JZ zr;M4CgZ{0n@RJvYt1xu_z4cw!_+D!)>wR=1ZL&7KQ!++6EsEz*ai{TFx;=(ARp zK>Cq&;r2QQ2n+~a%jj89Bjf~C0@E%63tZV`vn_QlFM9V%{>dit054!zGRc=QKhCJS!2nFWLmBmNHWj=+n%1Y6vJatCq z-Qo`TQmfo7L?E?wXsRL5u?5hBVE&{Ds@mXM)Tmu3ZTOZK4EIsfxDaFtu zcf;Byx2yuSuqY+L7oP7)n+Xcl3E^g!38ms*8+klxs-+IFwogaIORWaKsR z(r13kC1KMzs)cdv6p&()dH^oo2iofL;y{P0lRW$4JgDP%7S_=l6T8oIp-XccEFm4Xfg{fZylF=-td|7`rs&dOJd7{9p z1I5D~{p$coPM6*o ze@^WF5rO9P0oma~VW**8aqR)=CKIP>qFImxT~c!u?{kf08w1?;HMaFem93T}vrT;0 zKGxO9gsrzc)7KkVJl0(7h%0YW%5^tOO=a!~lfARC{%vMHMbaLiz&q|O_@wjF*vVe6 zS)c@tIVL=FqsX@$0ztNkP7@CkwsY;CWU4ohgV5v7srSNK)COR!b0Drk1BmfLioRqC9hAqPlN3F+;@cgZfSkH z%jU*L>>tQ}a};St0e03+`W`E~doT#k4s2`YOLckGQYCrU(nKdZS3q$1(ZDyecmQ3; zAzH{=mC^u8+IF$ZQsU5Sw%A_oT78Vr40}Nd*de84g0Z#b&c-Utts2zwx0|fxhA>5g z`}{sIW~oo{XWXd?to0O6_-zc4i|qvt(X?YLetOCOR(>pv2=iKywtGI*E`$?2u0z6>&SZ$nJtgqGxaO+T`svQH!KUX zhr9R(n+SbZ9KzsSa!-jks5%O6e_lW|Hp_Q5rgrcbNIXG|tZ^pXwKL==cC0@s>{zb% zRh-cEJw`zEmbO5fN?oB!UF`GQ;2wmD$9BZXHtl6Em_-fN-td}+-8HnkiHM~grsa}B z;w-WJTJ_6yM4-?$yW$Vg^bX~cJhwS=5&;@fYGs3rq?|oM+}apBUY7R!QSKN}Wxnq9 zMfhTIH(bcxB0NbsV{V#2Rl2*nRN4fRf#>zai45E@+aOBq^Px*27;`#1G37=WnRt3< zo#h*s=bgzcC-gE#m8Lc)FH}zMeUqM|7-S_XIhp!AXc;5dFk3n|ySIT%Vv?~{(v4-j z8zchDhY-S=z{vcZ-!8UMG_?TU$S}Y7&X>_Sz=y}Ye5C&^=|Z5qo5S(MG)9mk`B1@y zkY2C=df)bZvp6%6k|1ZEV&@{U2!hmRau0d@OcIP7@*h=J1eMOl?7tiLn_K9F+XCj6zQ3(gxe4F^qE){K|o$ALA>C z@6+KJi!$|kM{K*iv0gU`4cOAKd;R%s8lIRk-rvyTF4VY9i~%iYaQL74-= zMgVw&b1WpyqAd!G?O`vP4;bd`#MIn3vVRpwNzePjka@!q@0KfkFPS|Pb(j5rvG?9#O`q)>x1v#u3aA7X1VpSd z#EtB&VjT=2Oj%W#0tzI^1{JX?s8vCfsj?v;5F$hfqN1`zAgln2tb~Ao?DbxEY@fD1 zecC?1_cz}6IG%rcc+?h>@3`;lzOM89oOX{`)<8*3-EyCPw(V6_bZF;V8{JAzC{~0_ zZd|zETxk%Z8HjzJ%G12ev?ysvvlR+SF%-$c$GWmH|Uv|k3b*cx4n>mA%+A{Fmx>p z?X^NmHbb+>T6<#>-4;k`+$McSZCE{Ah%*0@dYOB;F~bf$N~y@MA6_0k@Sfz4ZIIlS zVxe?ROUbs)Z=5kV)>{yqQ*yfiLoJQoefny9qyc5Kaif8fQxyOd_FR&@cFseZ7|biA zbpv~|&uHiXcc#~I?0!e%+D)5H7K`DA-}Bsf6)&uW`^i8;$DKY9SxHHzlWt(+q`o9E zmS20sja7v$!oRIxWdU)GjHYMvf#Z}Rqn#vq{_MJZ5Z^k!{+|6qs)dlOD@GZoxve`> zSPJq*exU71ISPj;u%yY9#2hd&4AKo&$B7ue(NS_qEo#OXQcnqlXcC)cJ$5cqvCYcr z(UW~Qa8EjGmP)a$O+$(Y64Khj2f|VzinoN?kh&T;4B56kfs`LAR1q{VqYd5{jazxv!poXL%M zv7(ikkAC0*nleE}A}-D8A@@sWb)12&N_|McQtGjpICSyRF=f;JrrQmqf$gEKv-#NgsDudbv6Ci`^2114DaG0O<$ zBhOvLfE-JHk?2Ar!G4lN4BAHIl5w#i^{`q)UnBdKfQ5(tV4QmN1p=VPotzo9=9INN zc!CXiRVS>`4lmkk&}a~35}KeDKzGg(qCd7+3CYZv;mGS3v>kICFrVI@z%iISH|ojW zn@Z!aGK|xCewckHRTu|OW3Ge(GKI?*8K5WzZ}bVZI_a=6KcHcJN8(%Bc!}2rpCfYg zW6*C6PRQS(kW#uI?XpjvHv)pAXY-jAKH(WQF;J*Fz!&-nP~ZgG$u!cUdLyNiQj1fg zw-H#Ln9Vp#k$y$#J+1(Fo)Q`;aNVbuW$@gt(m+h7UW59?(bL4@f#TfUIBnWV^SML4 zUI=mSgzwNRyWKJmTuoPAR*WEK;q8qydN(Z~2?xPxf*6oCjpASY?W&RKFC-t#RfPkz z`MEup60glSI5xUN#i&}BqNc>3=%QSDef^&Iu^M~g>t?JZwxy{Vj0$!_>oIrn)tHn< z2bGNaCxVMVE{CRhdt>c-L)Y0@#)M1das0jIT$7+K>N0jWx~_f{nr` zWIc}ffY4w%TjkY!Aah&4q#1^qOO11BvD|=F&;icH(%byLbzu>&!or%_1{xXgoUl_< z<`ZiyR^=M#IRn&UNXZ76*E%IJ9G78NzRMF*oY}bz5X!R1{q_sVq^OTtrxd?jCbf98 z<_pspdvSbq&g^Si^VQNSi9`Da;C+T2}r2{Ey3kQ#)858%^W3Zi+XPBs`ky-M(tgTk;H9zKlLT*V|X1@V0}lG zd>mU5)>~39`DXluhs`R-LrP-Y^q%T2l*O!la6@VE+55pusV&y7P+0pf)wf96UC;i6 zTQUPVCm&tQN#_1eD;NjofU7iOBk#9y8}VvgW3vUGn!=QD(Jg}W=LACqCulgEdnBz? zf{POBUP~B8(1bz%lt<}XUU;pgoOaznV?d&UApbqXoyX}^&CKR{Lu#m_C!SrBdifYN zpLzcC^QE&gZ`1(XF)b{s3x@&G+gWY`rz!Sx;54}yZuMQOv&IMUEu^DFe_n_;Uy=69nNzLU4dUzxDS?)hD3&T zS2D{WAS~Ixz?Qrwvr0(1UjZ5-lk^^}bb3da^6;S~;S-~tvrP}_L7HzRMFB;dUyv^2 ziCXfrSnJtnFvT^cSQp0VHs)B4{*Z@ujQMcK)23^=3ReLyHFR3fuIu;a8uzlzCZsKe z5naqLwl1JnX?1W$#>EB*d$sR0dB{80vt z`mZkwx%BqrBK8SG0T=fS&P+>U(yUW2qyq6If<~S~g!UKCt?ho}7NU<0tx58^PKg&T zfs6cEih3;9g=U-F$r|VTxT-t4=OnC?4ua*DA!SZ^xt*u}DAkyQIDhsO4kgp`k7-WF zLa7Is?d3&{uMFqY)#JIIR#NhJl4CX)xLlMG!hKTwsZ}npF}`jciEB zI@_cZ&ORaDENrB7g&E%eYFBIiDdQ7r%{a1GQmqlwcyt$3%!z80xqdTfG1z%KlYN_|Xe@vG09MQ!Yh&AvPE0_s#}}K#en-tdv3&bml1~ z`fQKlUC6j#q4ah;ob(1DGGxi-Z1tg_c==;#@w%$M2e+?yAzq^eD-v5wHAx3?=Qfu$ z4sU|whp#h`hwzDjNOur=(BSnu@Hdhle)SmEIxl2TBuB3zUz1qMRVYwHdcvzVlSTbf zdxQaOt6)prt&=0rNxFuK2v(i^966KloNJE&3H|RXIF} z2g-x9TEtec^*X~7N45~PwS$jj&!$U!S^*nb-KlMh`I2D|G{T4Vhy@l~qmM#b{>@ka z*PdMvBf$jRIB=t90XYL!Jz$l5*%qVSm_xd2*T%g9fxAx#b9=xnFgJR#p4|lW9HU*` z*VOOBM9ZP!E?x{GP^OwJAsfx|&wnQ}rien~m!&=ISu6jE%Z#3K;4*)CuZz&RcazkW zDCdmd7*Nj9+hIECI1l{(82ZXI9dm0axX(UtDM*>W24~J@m`!H;?q*Rs3qI3vYB{YQ{5$Va;koQ$si($PI7zVifF zQtkFdEL{HAp4gvOfo1rpr0K5?7Im?mCLBxa-{`@qTfoewf$UZx^p9qY^`W5BtgT|Y z+xh?Wb94TC5cj~+;e&}(`$xMEB7LzwI#KfP5b0C0|LxzQU8zT7KmnoyL=F0>nTa6+ zX{EdNMg{OMzrA}0Bi4Ztfmjr<#Xtn>MQ$$gS`Hi$5Fy2*C#DZ)FIa3VApf~|5d&2~ z`+z%j9u9aH;9bYnIL`3b`#>rVWFT}3?j|fg&Ij=L_^as)7b?M1Pjq}Y5uNQ{J`1EW z`+qG^dH9uLf-)L9OG8I?!)$Hf#FbUhQ@;nGY3n;HE(J;keIz!)d&ikBCU1&ksKa1n z1Kk~`5_1vI4IFnXg*emoZ%JoBl)A;U>j_>62&7CHGNu?ePL8h?_Dy=%V)ou=w4wcD zQy*lj1b;>9gBtVe47(PzdXcsk?iM&vRitaK(t!2IJ$bj&bx9bNAGPo~xn?Zac#Z8P z^)PrN6^x=~#rbqW5fkHl_txYS+Eqa$lI4TnC&ur5Ljwlpe*etx|IaV`&t?421M;8u z?7y&rpeQJqp$-NE$u{wy{e`gM0>hvkPCY+s{mueGRl#pTm$!>HLIv;!-4fR>0Hxnk znEOnR-rJe8zxW=k^`+R=UW%!dqQEXH`Ay9Vz`)S#s(I*k^1K5=dl_u^|Y?* z?^<(GU)Jddtp3#ozS?zyhxtxcwzmn%8aQY^mJPN~@k)-5-<)W4H!eB;2m51UbvJK1 zz8xI0yfI%j-`(9tt#0yUyQ}$R(EXK%Yo7L`7dDLKWR58GJfrUW#}59_pJL;G+3gD6 zTKXUSyg&G+zkX-a~>lDr*T`*Uz+x5dGCl{A)Z_-lq_^t|aVH8ZZ4<;riUi~>Hgbpzw5_ zuD3kvJ~Q?FNTg$zsVA1p_vQo5yQU{2xt<*iPjPaEHpZA^$#uR%`T)9lm6~I_xD4_f zWUBMtyzy;q!iLsx!4q= z7y?AqF3p!TjFb(Rgxe7L8?FXF+=1hWs}YBUvBPJsU9X6kJy3U^hl~3XU)<{z@%BC| z;HxpU{x82;*9LJ!Bp(3KFab4>ZtjUfoO##wq5%YajDO9u`}vX;yKO7h$q|@_D7EsA ze;#Rg4pef{RkKKT24c3#3?S^dr;oBSX+(rKW*{YlCk*wea7-yl?NgKd5d->9hDX9T!_u@z*EK)^y0$njI*M z=C!GI^IH#xOh?G2O{@G-8}#_jP~?&oWSRQDP3;m6RlqFC0OnUGZOb1qzZ6~{(iZj0 z^?|_bR)CHRep=x~%1>-Tf{SGRM<;rpTs)e!@Ztsl@(loj{wy57&i2_!Gc$VFz`G3A zDuTXsiW1-!TS3&*U8Ov7lmalrswWLdamC4+gr;@QeE!_YWWIjT`6Ku|6XLN&pF;Dz zS_b+i2|_r9g`xOW09Oh01L4Y;)5{f3Ydb3K*hb2$gHo4$)^MqpD)r4+_F2#&V^ddE zNS0kb1jjDmBE!6=o)e)Nw+|ks<|x5YiK2ejJ9~Sel5K!WcF1bY9|lCB4kuK^WBYlo!g)&(WSB=V{)#iz;n>j-i#d#;i?H$rseu+ell%lEa<8$y>>vs z=^Pc61xyg~-#e6RQVCL@Cqbliyc|^zkj2Hh(pN7r|YI4+42nIE} z*&}WZ3i71Nf}Ls50-4;_U7Ev}d|`p;C!_eo74NbaWZnu-TjTEn;Q|kKZ$fJr9M);z zZbEoQ=;iGM;;WH&9fj38=s#>pjH$}5c?daaII)Kg=AN1;Y1fh>nzW~wz(d- zVluCdCUh5S%uAE67u@ zbGMVMX#NAhCQhM(_=EvZ-Y3u6iU;lRhUn7#EA5c&5At?zSXH}~%k)&oPe%ETH4VT= z9#Nx5-B4soh|VAad?7@%pZVBB02)3D8m|fwLE4z*tVVc25|)#f3Sx}m$XWu)OEUao!y;K`my|0ZfY(gFd(45XJ7 zFg>+u-cu!UbeDVgQI5J4encyLBs)+D(Z{Qe3&}B%?^sg^>N0;zxwi9fid46%}^y?XvCGg>BwAF{#Kd#w2rFSY;)zA0M2x5>mhaP{f z%U1OQ!g)1$3W4SC;ykWZWJvG+sj-`E#^3-D*Z0P@v6^V#rKn5|~` zH5yc*bG*%gVD(W5ZU(ty(?00}G&r#yShh8)Bjf)4`!mXFlb>$y^AvNgHkWM%|A`-; zXE4s_vsfGGnOm0b74`bG$`f#+1%QJs$^3p{Rl{qIrNWTey)mdpYdsYv6AT?C_NN!5>^mB)EIVyk^Yb5q87Sz%P|4x} znx%>M+|=SZM*~4F7N)Igp`}voSF|_k9e@QApDyU`q8lg|dqRl&r8MlcLk;0~We*0@ z-;~OmD=fFsmA-2&!u?nV#$Ci&Qx&dwSUj|@5nYsv`Uj&#s{zya5f;Z+?9}0-@LWh8 z_}x~!+{_k)T{HZT>vo-b!$-TFq#2A2%;WLVfv!%-7J@`Fn*UATj3o}&Ef2d8fNkmH z3@!#j1$Awzo&osQzgfocHPRA#<{RGBfBrp2i0q!I`B?Ewj?Ih93hase+QZV7VmE_T zaAkEax3CXwX=mA$x{GE|m;wg@f1X+6a`$BE^`RaUFtz(-A#iK79|_cGEjKc6{s2f{ zZVwb6d*fYLI7(mW|J)5MbiYG;misV?eGn(C6zRAX@AXz)OmDWZD_(dznUy>z5tXK; zWFqC8y0afVyZh+)nixt+3f9|}2=w{@<3j|HhItAzN-e7O1C4@o>i36SpqZtWo7>NcD3ffmMKOGX! zx0#k6h;xD(pO~R$$14q+cvj5y%OGh{eFMh%?&Aj*T8gKckUzR_={Ic+v)dG?4yZE< zirielRrL1~T)$G#34 zmeaN0P?~)qHQEUGxQ^XGs1q84JlCam5BiMQiQ12*<}xuSh~*-xiWEG_`Zj~#u=eJl(1+ZC+0 z_CPIw6u2Yx*4y#?9kSV1@zSbxK*499-vR|FR8Y$~^i^;)H_H}B%r;1s`0^dnhSAWR z8OIi(dL&nW>5-f`$ZiF{7Dfhl-9Tah1k_O~ngZE|Hw~yV@Ty1^r*wmGbh_yI5}}Ef zKpF^MLET~@u-42zUcvSYN^xngQrgjPJ8 zRt{5+(w)0e|)3Q&kUmHY%XznKR zT$D=s0$0@+-n#_>B{{K?eq8q65gg`xX!;NJSF0nl?9Krga{<=}@KBdWwW(fl(bhA< zV|4?eSYhThnLLRNV|B_XuZg~UMQU6M zF<%*isal~Jkqc3WHy(LTgsXH}%HU-2Gtmz^BHR-jP{f-p<(txyDt#)Y<&IrQLxR;O zNS}~-5z}1gJ~!Dju0(_+jLJDl>`%@HW_n@8NQ-1qo)YfZe?EYIy`l@YYdMOY9c zksF`~>%$ChZFf)286Q>^WvaA-Ai%IHFpiGVcESpy{_#|S+P_qBn36utfw z&J)cmMU)$>bwBK9i1>9^MEc8BG$@_R18wNW>_?Lh!f+R$z4T=SozECZi|v|53hen1 zB08`r3=}rc>ye%#fG7&*5j0Sekx zG&5R}6Cc9u2Wx#dHVaKXoPjbw0Oj-0!sD|`_bZ)Yo`+KQnKSE=iYvP@RS;;E4eeDH z?)qo(qx6g#^2r{kO;{K2>~`{!XpDsLk+Z2pE@4^6J4$d4iSAA|PY%KQq;3(eKk9B+ zV=VR_Sx#iK-)x#wY~V=%0C+|J0{7nb*n=}Jmt*#H!kMqA7$vZMnMROu`T6I{Tnjt7 zrqoi8#wbU!n$Abx6X80r_)EYAPE-^OOiVuYDK8EB1h3pt)@sCjkC<#SGkXXQw8dAs zYiKzAPKB@Ng!Bw2@#&+cv$m{IhclITkn`ybR1Z(^9s{G_qMYGYuMbO_z4fJAOXC;= zI}NvHnAb3H2%U4UZ7)dXm1gei#1f^F0p<<6PN6}_g{-cs$-=5RU@2=jTX2l3`BMgObU@_k@FBHo zmrl~_PnEhFI+a?_@xoK>SZl|%M}rTL%RfQ-oXWHZ+~c_S+apzc#=cN^=*wM*LY^vz zsE-`k&64;K;{Mq&3XM&(*DF3w{gG$=?w9SuDoXr&>oY!>i{{u?x}hUU`>a7ow?~yi z&qVYwg1zUv25q0V!NTzv-m6kO?{h2;R1kjop4~@PnMJ1){#<82dpQ&Kf)=;7=B`o5 z9s0tH=QCbt%I0cmh}TQk%q>_si?mEH$cjk~an|XMi3+Kg;bf4}MzFAzeJ`67ZAdWp z0%+^M(T-GtmccyDOyXHm^cG~P5c_nw@L_~~_78>bJ@{)d>Ec~6%m)>3U zI)1&nDd@Kml3K3x2oKj*B`RhTRh$c#_dzfgJH~hT`@sZ;F79l9ub!R<2s-DUG{?+B zPKIYJ{8jw?>$q*W>ud&Y)Yy#)m>9Vteh4`B`>8V$~H%4Ou-N5cX$t(f(apt4!JR$Q#cEUs&Ciz0aSZva*qAR6`G?5#@c%UF`GbVGn0RMge_iF>U0ASq@U`vCPn|02-|^>R z#fP@^N8ai}sG-Z$r|XtMClQ#oYq(z`R((}LVu4hfZz5txqsCn#u%k^9p>cPb*(?OW z`@;J>#TOsv2erWyX8QV|VbCa-=g92dZ8f%*v!n89LXC9kA$u44ro6vJm#iJD=xtv9 zzx+-2^+&GC?8(tPH-Zu0!_OqaABB}5Rqza@h z+oQSh0JH)4)2i;Tk^qH9o^s13@sZO3s6qZW=;I1s<1GGXAJ_eWZ`*g~-r53abpIS~ zZWh>Hse@t%wqSrB)en^@0BavaiQd+X6A6M7Y6;q4R2%^o>&Sg3ugTV>wM^QqX}kB`~n-@?R@`cL7-lo1^?TNS2m7+$1(Yv zF=-(DZ=`>xCgPL{#aZgG?V_X3ZC7v%UG9q%?j!S8{8p+z$JBYx=@e~tnp2fMP(|cqcMsEsn`rW z$?fR6pr>$Z|CqWpO(QN1X-NyX++);W^KI|i4r7s@(*zu$EuCD>TC4pHm_xiQ%Wi&d zwy*~9!hi5w9lQVO3n~h>aYO=j;yZtM$xUx$Z*?Rm?_n!K)oB@7lX2=Cg<&^G*lhrl@dK;urv+FOd7rTk1zE|;fmE3o>!rmX_ zeCyU^Htova6J2fis_^;<3a|Bc76ukLS5*U0ca^12y-!fTrW^nQ2$20tVT8i)iA ze#i?(fn7}k>mzqeUrQd^03ixukd~tPC;Z(1e6|1FHF*60R0a5d^FVSG9v0vVfDDI@`p8OEdCN(<#uuTXTkMRbzcH6sT_+4c`J7FI=ptf6KvzT_wI+C zRIEZa7G`5ki64XDj9~(`*v{nVAD|YK!2YHd`)1!pyNI;~j9VI{U(WDJY=?mhr(e9f zLw0ut3o22w--oBcxd?@)?U;71zu2_#=1-`wuix^GoJ8XnC&SIjO0E9FLo|zQAv!$?}O^=zXsJ8{Tx((7;e~*@xRoq|6CHFEu;Z+e#D_r zvw|09KyXk&Q-_UEGruq3;r2Km}qIA>qc?y-<~gHcAZ=T z+XyF9og221O~1=j_o)O^(NhGMmC3Eq{2M#TFL?s@l)bX4kTcjF*!%8LNDGV`TZ@Ah zSyacU%vOg%^f@PzUQFw5SZ;%LZs{^B1&SdbK@ zTfe)v8?Hyqp`dO3lh8n$_-slZ$l8k#Ccib6=DD~+S5Z@># z98D?AvXkkF200e1A;Wy&nIJ%t!JX5}k#Ix-A7LYk3N4FvVlB<3l%jY~3osSwDE&ap z+ou@v%)DCZK{*U^X(iOcJWmU9*O%97a~vG<0D_=_1i+VJ1X$m{Q}BIU4V$*HiYZvu zH}ai`E6nPFfx&=)nZ1}NuxkoX67oQZ+Dtre0LnNWfSP&SMgi05Ki$$^0IYx#VTh6+ zK#XQ{>mba`2IScG?0@pQ0EI%zkn4{7Yad~louFKC6&^dIkSFqJ0kWoc`=gM0aGV#j z;VUC)h6kt=vP8{K#sFzz04W`WoO5rfjf;hBs3nEEWMwm)pQqftML`Eq=D;6dP-u~|H?gO$!gwZG{w+c7Qj7eii zK@sau6HlKby_PtUqQI<%X}qWC*}D8}>nC&hbW0zF;A^f0eOoB7R!x`*LvC44&l5=F znDUPGsQrGf-9ZkueM$;JnWE!<>$Xcs+dD5071kvEgnOu{)(C<+F+7V#gy%goPEP=1 z*)x{`8M=k#pt2cgb}yrnJff5L6_AWfY%ob1N!@Yn!Wf;k=4W<|1MsOXGve>{F>3`t zU(A`?gXc%O#;DOitO_kG)-Y>m>$`XkO`!ZFbd~n;fY4~E2=0_&;4z9fc(F(PYV@f) z=it>N`b~Wq64MQhk6wwJF_ajFnO&@Of0Nsp;3E(#GTaoe7A%G1GQ4TH@Gx7w03l0S zs(6JJ>z2x+v4;O-xz*HXOI9bsxSO%W#1+=BdQHCu}26w@wez29e5$0_u$dU}ZN8d(>$7SHe_|!ux|%fATI> z7|adE2lhdw-^1-AwZHM57jCg3pMjE@6yH;z9!=LIGZSha$E4B9t#2ROcMi4SX_fh! z@!9%~d#a_D$CjyAuLqSStDP1QLcgwg3sJ);Kc$r1HlrN%Pz~OTdub@cuTwt-v^2x< zN20TD<}DWLsm$#Nn7qboW=p-x^ujhddL9iH$9t++2AKpq4i>PsQvkg)kuNEqMkq5K zL+XoG%-Rz*AG|KFSpSig{Q9lYyf2E3eTEaX07=ozB$6bJR|m|!4+$WsMpIs;8esImGmO7(Cvc_9^>&UEYl@K@u-wuX0NbTCo}6D%uBvJ3M-cO(0~6L~d;t}R7k8wSiX{Ub{AN@8SOT@jFpde_*pi2*}O)!r1oME|xzAJ2dR zq_{xD3?i&iv~IJ4lAG8Ajv+Och2E2~PIBL~H>ZHy<1yCWal914HRexzs9(9SvAEha z=wPr#tj~agnk!pA^=3TLzpk3>5^ZdwgIO;dUJ~Tf|0G28h>`;H9E<~ggsajr!;{qP zW$=L(R+Yv$jN4mZKwct@#h^7XM%edG3&Lu|_>Gy)kMzAyn$CyBbmg>dFYjR-obp|A z)6y=)q&^}l*ApJnoL;{J%_f8VER^b>XrVB=r>S=W^x`Zu3bopUF@=$!y~zi|UP8Yzu!^iFJ1|x2`EmBq zR10l_^SQkT)fb*(H>a{L!)^Syy2dGPj(15?l-uDG%TLO3??rXq5)C1RP-ez_jzDb2 zdP#1`>D7le(EjDs*{dYJhvIc}*Amk$_|L!S6j8UT-#gMEjNR z`Axhj>am}p@4`6A13xniDl(#@e)Z98AeLCxxkM>?Ja7meBf^MWRHW`5h()5e;j${i z&V(FcUroLlRa&oezbRh*F;Uj=%{kE6v3J4%DTXV$DZb^Iw27ia?PfJO%y{-e%Ny^u z-$H_zmKBwf^2rO@fkL>;M;TMdzrYYEJ7<~U0|mJAsLnc>yC&tr&GfU@8b?~#>+V&NRlx-iQHwPV|8H}*yFY##7VzlflP$LoS_^!*9^}?bCj4j0??&B3=#28&Wvy@f42?}Yx_zgb2irC;2_n4V@B=S0tcy+skU=wFdq~e z;)d*Hl$GLpvMNv=4rn=fsg-}Ol=#K>yHPu^H1&SOp?6tHF0L8E%W?uAbDe1()n6!w zSxsPl36Ixi2wHqN$gWLpA*=<+RL_B69rCqP%N$DZdQ^=m+;dYq=~9=ob9 zlofQ~^1O%v4Z9`Pr%Ceoaoqlo%@QGaQGIruA6AeVB(h4>+3dt z-A3geDUKtlORg>ZUh3$Z!TV06A&mpUj$IjyO>}ct7190m?C#TI`9JGBrUn5j)nG$C_$4 zE!L@iH}DXP0~se{7kQ6uog)bmmuDPHz=hZu<`v;`KN z8QeF33TZ3XyxX>IUWDb7Yjoso#x{89uoo+M4WANKug~a`b!8`~YJJdWgw22gW>oBz zvUKo%^Qxq9X%lzjoyRN9jsyhnBD<$@-G(ycYRjqH$666XIFmP6Q4}h)rW?kGOm4i7 zmt$ZL*s4VoG9R5Kg`J@sXY-`$W%6JSgDPjb2~792a-|z2F)rwt#Wy432$X<&w(#RJpVR#i>AKt}olUV{xh^A~L)Dr5ECBmWtbA z`}ErU{Y!W@^lJTteJ<&d9KC_O{4}c*bPywIgH1U}=00q)-QdS{nujSx4Po(cmU+he zsY@6u>9|(6Qj1dS;%A>!d4>cD3HMo07`NTj@Q?9qTffKlDYT;|yqt6ZvwN2}%CWMb zYa-n=XNDAuZ6lEz%yL}u?QU*lGlvxz>lP*cZbKQuo5S%8_z_CM0_Gy3eqV0UR0RMY zJfA76&AyV#kRco)1fR>%axEZFCukN48aUOsn~M}M@`*py^TZ1a3rBB#5JGgq$9cf1 zH_Az((%eQb!vW;!$$j3HR1*I)d`+%#rlr63@nCg=L`}MS|Fs=Oz4VG--y1!{IMAi{ z=@u>({nQ{`+PUf;njGYkxc?g(jD06IfvSiy(0xT)of^v&7-9=I_ZdE~xT!4w%DH;D z0;}su3C${drw5O|QSf+WQV>0jBtF{#JVuiDtL^zzX;<52H-uYN<8(0$jB*8H4oE5P%N&zjBi?U{mA(i%>1Y_?5H1Xzg>IuB5+Pzr{{Ge~6@6sCO*J@Do&rxp zHu^wL?eWE563r<|YQt~Dl>p2(F*E=QSoeU57ovwFwZ)g#>!cSPdqzBC0nA)MFUP8` zsLsl*5evD-Fxgpl``(*r!RK_MY#yjW_X3y|ih4dPUgMuxt` z>Sr$fcgUBK_Aj~g&|pvV*nSH&fZ8>EgD?Vz?B1p4@&}>wPe+!Eesz$#I7m*KxDiyO zXR%p`O3u4sbiG;>JYz!PV>nnW`fqq~AoBtVuK-)${S{kZMFyS18PF*VYiu{><%$Lu zTDH4C-e;dpHFBkG0^8xNc@*I^p4$U4k&NF`{4D?9G^uB@>}=~4zr0H2kpmyc; zL;z%H^yJsFtcT$u7XQ*-^Mrgo&w74&*F_k^K?J;afsJLQvUC#4o9pJ62LLf@YWKB2 z-3f$>D{%4oO06#@12!|CDF;%$QR~j#eDEu8ZUt`UR>Rp?*64wyu?`;2GJS;wzc3}f zE6Ykce}}vK8@1h)BXPY|G3O++1pl<-8C19U;Yy!GqeI_7svmERGnc7%J4JEL?0SOm zOMfbGV?mpnllAcXNqdFBo8>;Mhd=%B=V?1v^kE=#9e?`R?=S9SE^zZL^EzmDHaz%G zRRJBykuCO3L*DRU907q@P9J`~|Ia0aieh}H8%IzNbMuSKD_M(I>OvG)4Hu|%1x_n3 zKEr)9a`&sy^?OQ@d2rvBm)rPyFBM-}wkB%stcoYst)xwQ+3Esg>2jaE5N3Yiq9qe* z05R30!3#b506)3@t64bB4Hv1zRlgSep8hLM7lMMm*G>liV$#weR#Nob%T+G*cw@4x zf-R^;^q1G1JZND_lmFHL24kD)@xj61Ejm%_bglx$Chk3aO5%M^+V4&{;3u^Yf=M#+ zXZ;3{OrFAroZfrb?`vr-h@vVGu zJwNDQFzGjO8?B*#(QsTgYEX7GGJC4he|D(Ta1s|^r_^IP59Kf-X( z_6@7h|E%Kcw+FAQM;3!(5c29w>4wIy9#k7jR~H!Y!FZ8BwL-8Qc}9MVQn?BXkR(E> z=*7Sr=aqNhV0ZZ($GiL2IY0mTYGk>6KSvq=S9E&+xoiJIKl$qw|HaVGZ>o+z-3vJza19yu zW9ok;98dWFAK`dVmBHLYPQ?r6*PL*?+BF@XdkXL8eB8TOb$-SjHo_vm>BJ}2M(fGN z(2;C~$!qcG2Etv_6RBKKpg%-W8YeO>3y*(i&EWPp3^l6G5Pslo>Nr^FC(dNJ-91^t z{SwsZ0-PzZ1>^~})Z^}r%rx1RO>L*xRmc1dCBuI-anEbF^#b+mAIV++Hxi`($o>9% zm5g}tHs7jyyIw#iHP_;!c$j<%SBjnH2}q6{0f3+&5LIXPRsP%aSQOIkza+Ij)!(bn`qaLgCF!RutV zUNwwcm3iw{E#A%al8UtHzAd5Z9#F=;L&(9J1ayupQ=`BAi*w{}JHmiJ{S~((IMh7? z7!@}Z%wcX5j6-saeQQSnr_!AVEi;}YpJ5t5-2h#IPr30Y z-o=1R{xk+;@uZCpyS|(s4RCCM1!fmSIm{B>Dlegt&@TBTQ z$09JOXXcm0kgw~rN`^~oy}RKwObnR@cG#?Uiz57pdBW3?Q|3nO=;RD9qG5;`y}dWf zj@6tOWoIdq{W@>$*qOmel)**QfmE4?3MP&!RvF=5H##r2DvSAtL0E0*`OwEph{*Js zg4v_7@id%L?xa;Wuj1aJt|ru$yGMNZbb#HL^V_+&Jq`wn`JDTeu3b|2&P8DMw0_!UAOlV@& z3nMxY@JhxhxcSk{0CwpO(LsnfqH*vp7C=!L0OI^Pl)^OMjvSCDbLHBh0ha5fPzc>^ z|D<;)6>t6oHBy97r|*J7n*!*H9s`gl|02`CT@ehszMn@xOhZ8`x>g`?cA&8XO}ra) z1q@`)jhzM=2=&8@5Gsm~bwRNQ)ceeReb9h>I`5@zgn$AJBXj4jFoo`Q^CpZE%MYb+ zc`B-%LDe!wYOc@8Q2eAI(VF5t3j#S0U?vqI&pQK@O6}cMF^B-dArfWTyx;`?c!CIH zI{E}T93?r;?PH>VJ6~b)bSe)B; z3&|ZUw0qjj=)uIk3gXx^_yseQhH0`5CNDtBKpcRHg(qqR{{sZWip!q|izva1yn{tk zgT7?=w$8*&HCO0w47a6aLAnDzdA0lgjsEZ~O*mCxSJ^*?^Po6+r219F?Bqdk<1Q`( z*ZQ{C^|i_1TQ|J;2Vvv?B)padp9s({b{Z@!4G(dNYy*LnsBQoIq#L|mQXa{|61bVV zfh*W@3qn0!wIVryqj3ot%r-AUDn3u_GQ@LnfxNZR4?4jroVm+*;aGFI1yCvmZfT1Q z1@Mo7`#2rQBWtHV$8g=j_P*_cXKY+UwQ>?xQv!62$h7)=;#RhOkz#;(h)V=1S`KG~ z=gR`+R<*fCE%d-A;yM5W#X<>6lh&V*@ax4TP;@-tNW+kw9WBBztz*tNfu=-l2WA;= z3Pe61^f5}fa6|b~h`$?N2&0Mh4r>orS}ZiKckCz*8i2Vg0r~1tCPl0NG+WMC*vQNw zfyhdHVhhR*BXFKtU4v=GyXskE+SJ`12iCp;ga?0rPmxFA6Q5`+zQg$SK_TZ)C@azv zULg5dPm>P@ktqYIKa!SX31!8*oPwXz#F(+)U7^BG1aeJ*gcPxI<0#sf1n;skG#eY5?e*mUCR51O* zfHq*{LHoqnc9g-iC|WVssT*>jXF5@`4dvzPBTwJn-df$Z-XMTpEo;kq1X2O%{zwdV zbOMQamac{C+TA*78}zDQ8S5^ko}y;%lAkb7Z%#S>0gurZ^`Ed2JyWlotCa$(7ZA!F zqZvGQSxyqxBFCVBC>6*@QZ-EGOXiBX&jL&f1GW5P(>j5sd2eZTl2bl|QPK@%_NGI%QY-BB7p<=Yf{qqz9>iTxtG@_~A}cYql2UmKr#Rza;GBj0HM8-|NWSTgD}Z@mOw!i3j1|YB!To z&A20rJ%rBAN{)C}frv$>rp2rmBE&jfpUi={(}N_+YG8oG_VWgak=bWXSwwe!M`qcv zw8V;GTW(*s)6ZqB7F(v|@MpM6}lFXkk=`w`<@bqDG{BvdYsJ zgjs2h8Rq3qTWiLM5MS(PN}K6?ePdjymid*yiFU^+yH|V4IDm)@gO10(R$ z{ykmL?Uwf&D@NGxy@#Rf(yz7f6xqGZ!mfR|;y36?;BbCp?&C9x3(vA6viif7)aTf) z0V7H_QohN0cIDA3JDzrMdBXEL)e)_ELlFXtd=d0+!aQOe25EI!c5S{9u$n3Gy$%)X z;ep~)!9^*S!DV3syTN3&>QgVMUZ;BeKo9RfUXFC2Dia+S{f%`oyUEv*!T>G*Hsm0g z)wjccXu0U(y9^Y55D>%IWJ$f^=vfI8#r~8XLfOK*a=h##%F!iquPGpl+mPYF>h}7+-H%)@fF?K-IxeufmGtnw!eVLWS zdE?_K8CiCg?DE#8J^ik`ABp{bQu594Vq=X7oLWOiyU+YO68Uj?-RiLl7q-gz&(<_* z50qO27jGcF3f08%Ix0R35#B@)A~FhW;3P6vhG-}=Gvx4hkF=>E zCwYDB!M@7D^c2STWyYB<8x9&y`S$ajD`}mNHE$M> zsYJfY9FT)yW&R)b&O0uuY}?j?pjarN0tyJIw4xvwNJ?*{ zm(p;ngo#8hwi6(5eZZJ#Uw%iRKeT?w{dz-+@WW~?y)v>Ql?JUYBNWvxnKN+1tCb`Y zcPbH=B@!C5X8K|%a0KiABvGEZV_H%Vit5Ay5x&Eja8R>1qP4|KaY9U2j#kD{M`Kl0 z2bbf(-bji(%buF%+LNc(1Q+}|E^xuu{% zlgSs~6_c#2+e?^?bb3E_Z9$4x@r+xUE7(9LlxuL2utPF3)R^Fi^O%9)G-vMjlAtJR zgi5Yc-X7_HU2ZI-%H6^6)No8{ygvs|mZ5nsfJiDSq%xh0@A{j2Dr6HUXl0qW%TuM%2k0+jcb!Ut8eqT;ZIwDF9!6P!6 zX_k^@l8w6;ub0Q)5zPY{Y)8vTHCm<4(01@gJVhtXOIE^S9^87hTM}?}<^FY9q+?O} zzG3;nP+2`-#1}_(9@qzG)F7xS#P!=zB?Vcjb*7$uj}wXcutm%wArx{CyTsm!r_RFA zVR?^JSdVmTS&7!2S&yQgpA^))%=AX4=gLcd&q2ZrcQ^u>Ab)RiGAYSYy+Ba@&5z`~ zwlLC)kt&}M6!k&>VUUWWm@+2V>`aSnFx;J(N4?P8KRKixq4X}*Y8al`$cRyZUd`x* zP4!h{CkHFWRt%WuSV_Stq0MRtLBL@$m^~nAC!>h^M(6|=W`{B z^iOAS+Ly>@hFC@_4~nMeWB5yrwQpBLh23d8Yv-YiGNfHFw0t7GY^j=_lAZ(I{;BS{ zVT@%*4xaQlkLQ@F={7fL$P%%1M#LFM0ENxt)8H&tE@D!Br1lrBl|h+6OkQa2Rg+@A zU$%P8lDq!OyKjzJffktoh+v}jooSxm*ALt4vL%o|;Z|Wx7S!3u`~46Krainc@U($Z zG?&A8&zr71KPKv(^k6E!FrnjEN)4-9GMI{iE4OZJpN`&EY5*|WAcCrAt&QFmvKOFS zw{F0c#6HeL(Q&W>pR%hsxKi`N0$@wxS))2dp2c*(nl4!Np$9GyxRVD+NAu9Bm}091D`p%!?-8nNW>x4^$X^bBnYaL!$o?zfBa9#LI3^|Un(hZ}Pk+R@<`bhY`i z_;dUH)N7b~YT^PG=W&*nmMuC5Sg7C8*^aZbk@<9vY!7vanQBcEiC+c%lA}=c$5(YX z(!Bt$FE%CuI=y{jvlIPhAHpUcu>!v=Bg^^x-@$3mfP?NYCqdFgSJHz!W?hHg3RUxa z;7o3|5dV&U@2f2w-JI!I z-VQ(ewUbH{#l{p(Gy~<+xPCyFwPm?(N%bljF(5c7v#mvd7??yidzr!1VixIK;~9QT z^6@*WfH+VViQ|5P4Or61w}=j%ufA6;tOeJ|(NC#1;~A){W@(wD+q{`7=UTTom<#W~ z$88ajkYP2Nv}?Kp(Hx<^aHiUu$|rRbkTaHV9P6SkKhZ%sPAfj;IQzN z{w;qLeY^&+$KJ1-Nk2m%Ea$s~=&ftmXF5cOq74tkW+Fnjo(JDv#S-{bY`S=Bx58VY zterd{clygC|NNiJo&I=&YziQQUJ2+i*%waQv4C4W6{Go!_XFDZt~D3%`Q%%PW*5~J zeqIKU7YJZQkeAIBIm_l_<6XV|f96ZUaux=`v2 ztkN~sg+d@&$pY;V`gGKh)6uK${ugg7aiYx35%ER-d{;?13c7~NZDI^SB7)e`(31h; zBR~1U-L(Kg2d``OzuNTeyEIlOK7QmcZ)a#z?coxvF>V@WXk)i7(ooagM@;9Oy=Eww z9bIJCzj%>+Gw}3WGF?=vzK!QO1jK1pRja4$#>PSd7^fV9Wh0?IGBC#sY?Kid^F+G1B`Q#Sa5ufChW6WLJ{chj6ZIx?th&8=ZNHT4<-gXraL< zzAo3rWw-G8wih45MuJ#d?2;<4taf!h;&$ZT_5<_IWPg#8PY>3{z4;m(eKx znkgtC~ONG}aZp*_yj&ry##Zj$yNjfCXN1zPl3Eo{c&ru6KgDYUhUlUz7 z{q!06ezo;ve{I6p@_p2xwNnd+-RPDYi}OYpAG{3zC*y9_Jy3!uTvD)!OJ23CgNo7&ZAp zP4!_3&qLk^6t|7$$$!q=`6$gfYhI{`M&UipX|hm%qLJON$$ zX9&^nhqnJ2F;4%36RQ62xilY|aksP3K!LEccx*j%Fi>30DZwSC)Q*3H-S!XSs*hP_ zOP@)oT0hePEh$aEbX}0;|MrV!alm?G^5|JMHaN|pmK5cNBJ@t6eQ`GDI1e|pOm04y!_rfsC^_9hCQN- z-b*f|CwxNMH$fS!8++8=1`>dR1$cnOJO~|8lHR++G)%2c=hlAG0=(J~8d4@^*}?Pv zS?=BXKzfKh$3TjRi`1X7?cXcMVt%RsBw-H!^`*QsS?#nir1rz-23J~?=A3=U7+b8v-7JRdn*)hyriNV!kW5Qtlrr2qq=FQ7i$%l zaAGwbzJO=?SpZyHc>DRbzxhE|7Zn`-yjMrBIwPU;Bn4Mm*m(SP2mbuA@Kim5x?u?n`(**(b_2tl zearFC!;<9B|L&hR?e{JNl$FUE)pd*S1fB~X9r)$BVR)h3FV``I>y(X-H{a?FJpDP( z{n0PCez_&Wev3AFITO8uHM8BrCqbQ#zGbQx-FZl?LcjVamtI?p`%@`C>K=iryha6itjyW#n# zO2sx?HXOpK$?*w{LSJ6{p9>v;E-0c9Ni^Fdp^@%|s*NmEG?-N*O}kzc-ecOcdjvpW z;@P<{;&mFcF>$BsSk(whvmvmvcR7_`ZW~_PBj*NSoU$vDj36GWg)TxWgpJv@4%l1J z5c%`W&fu}yXS$FRD?X0p4T?!%ItmGj_kU`Kiw7r`RnW>h5tE4kh$g!754scyMl!pF zFLb>67>UZ-l3i!~p~eN}RA5sxBunmtoO2IZe(K9B83~ZM(&mJtg-pqc*VH6qvZ-8f z-T1g@7cuuFdew@h0OK+=u)br4~^}? z6rVe0DWZZMTG7R&)sL2i++~e9(G*DM_`s~>6|AASv>-`;b3s!7(vS7>*>3cDqb8mknrZe)(n9kjwVmduO`PziO=W8qa&WY_0lGy!+ zzOd{$eS)+Lc6{WehkTEhF7rKJ`j>#4mpk1*k5$@AKC-pXZTSe|eed->5bx6;0r9G= zrTGlvHTneNrT-LnYWBUj)7w83cltN)9EMu|6YO64d$4=w@4@c>u=oGZdet7ScAOk; zZlTRl;qTJn#5YBq0Au;2dJdHz$3R+jh=HXb%ts6jF_FWry+ zwf{;EboY;TM?U62BURGt4F}ZH+3^&jIjGAT_hy-op@3Iob&53YWJ5|BJP;`5qlcu8= zMkoMjPTFH&5_}khw9#J^(tci(by(ohW(rug;lk;d9sRv$sRSzsDHm&VxR&~D)oW#2 zR-*d~S=ztf*LSB~ik70yZa~N)t4+*rN$3E;`n4JU@0$&O3_>sbvK}t$EQ^U$kN{j( zIqPtJE$9?T3+UCH+bp|S=m+or7N~sWEeTG$oCk)}w!o4gyBGAqzu+_-SYa|${WW9B z=kE@H8t|9S!peUu?on)H5^b7nLtUG}tizHwQyUf`ZkY4#U;ld-)BE;IxP@XUG-zlBc^cQ3I-(E$%ZI3GZq>Tk6ZpsLNhry~BE0+Gu@07?pKfLPC? zE@h^Mp-G(ne(JA^P=Mr6-f~7;PfmhwKXj1;ygs)?J&dNxNSM8j8?+^6n8CBUn_fx4^8TMX=VN~x`qGa^QbeQ;SG~TWNE{J z0*`>aF%;kmb{AYn77YqY34=%lEwTCWN-i13T~dJ@JMu<8xz$l`{a@I$#Hv zn~6*^R?<#SU$WXPc{5tCtRKDFKFqTg9^Icj&afG5heua1eE8Q5+Xw2l4P@I`N$kH1lj2L~PwMLVN_ z)Bs4E+P?d6=2k#@)(v$9SQEf_fmf&3bXCdJ4#wB$C`;um9E5YvXlfsSzxnQtw*VLh z|JV>RPy;zzCc^)OkWDt|m-WFG8$?jiWD*^I53V|csUgZ3S50f~@L5l9m9*6x@`E|! zj!p!VJsSI}z1SMowNS?`O>y&sP3VQBL1)YZ8Bt#AZUczI6yDGPpT58%SPi=OkMYrM z-a~@rb#x< zMyBj7+SHC>wWpAzkdvQfAP|(36bR+(LE6sRa79LFWCx_n0FzL?b%$^UF!<6S-LZ1M zmC*z!^!|lu|1mG2@*Zn}_=ZT2U)y*&q7Pakgi%&5Ktm{`6SQ6rY~Z>r(z zCWvX?ssXQ1B;`csIgsWUe?%-rZrW@}3tS*17)8@Uv?E`-X)f;%d%W|&})b#<> z@AOKg2>e)$#F+R#yYVU&ABRiejT;FBhwNPZoeatxa?wFzfMP1Fcr%=GEQDAo<{|Z0 z!?5B_NtgqP*nh@7IuYfh&iT%SGd*JPm}SnW8XSZHX0QBZQtEkYVeLU-9T)iawL@)y zCFZ+{y~_ImgwdWn_ng)Mc|C@n)P@23qNqi3OQakR#GBEy*#Y{@b zl-1d65i83`>N`iRAG*d&$Kx)YqgL3_(zR~yp~v2aVh}=wV9l0Lmsljde=38*(OqQ^ zKn_M{nl8H!Qh!!_R)4~rR(1YjjC|^N2b{DdpjaQQa`%G+n{2BFR&{Q-*SPiZzeHX?jmNtGXSxQlo?IntZroCE0goA8xz8&V^uJ} z@m+h1GQ62+f5!Jac$~tUr>YM+=yF7q7CvhUO^j4x2?mgJIs9lYjVR=}AUG~f$zS+& z8TjZHvMLzDkbiF}r2O18ga!vf-f_F_u_+|wvM|%-#H{Auw9C!|#}bSfRr3sw>%zv4 zkIew$elGqoX{`ToD(~*~CtJXdZHHN0=-`!ECAqI)%blyP`QtM#qoLi$BAfc;(^4KN zWb17E3m~lo9gM3M<%O2$)jIHP1#P3?6Xdst^$%c;{Jy9c;qZR7-59jcA5O-baa4o) z(Ru-9t%WnyeT&Oar0%V~J&$%bMm%hq3+U7q+!od!o8--CD!P8MYOoZRgT~in?Zc`P z5`1?eIiqQ*CS%vc@eQO?2o1_j(Lk=RbiXWjzkaWJW~`}VicmM0IJ*kHIr|Il z$EEd19+z5}A9k5_9T`X>4+-YH{{?2TntCXKvgNqMxutM%fTh@BW1lLN7?WZ1trke{`1PLMit2u|hWBQW zyc!yPr%TmkM%`|@^WiKf@D^y_C-X$g1x$ESKhnMDekG$ii@`(51=#L0iIH;dxzlCV zbLGkK4a6_b?t&cH^5C*kc>3*uRm~6?M9}3Ht+slVBp-Ys3T7ODM5bL?eJu02#RhoA z@H?WW5wnj`X0F0h7F!TpmS*mS{P0$==}B*d`!7;+8(4We!87DxDW>h=!H7al>&30L zbWD6+OAz@aGa-Lf>FHR{wY5BG4A5;CTh_#TMM|QVd%NNHNOd$3knQpM2y(0~X*7Nk z3=I-xX{loH1mmbj0IYnC$Y>k)l=Q?IeB8^PeXN2fC9L|+&ybP3-oj2{urD3r#i#YK zSVd)68crgb%ph1h_wWU#!Z~F+F}RRz&neg3peabc>4ZXHLuA7_GD`avM7b2rohMSw zY6oAlFkgZj-Vhrq7m7^xM)zZZBe@<9#uLxQP$ywRz>1S{@A9>bGABjCWHMS=cA+}U$cT0) z=~zGb|2qZYn-?v(Z!h?47G^4Q)6o_0zzEiJ=1KUZ=#YaI(C(M@!qH;`a>=9@Fv>Ja z0k+`YE&LEC@MWdbb~#qd4toX>>R+$nmJUobYg<%!=Je@>|4Kh>7vfc@(RRU6^TR#2 z4Re(X#ANhpwbg-5hmre5g6umXgwPvhu$a7V&%ZbzBmeuajq(CirpNn zsll6Cs!or;50m$V$G0XzFN}X#lziAhkA>th*xHEELA(gByx7dE64=vyN36C!y@z!m z;Ow*OQs0QOBz`V73af#56YQuu6uWkj4S{6gN~IxKQKt?T&IC!ZC`rM2aNdTU zQfT1PT+M*d=es`(Jc&$j(w2~Qy1m?!p)|s7dAPbi^fJS!8>ODp44!?=tmQ*p$|EU9 ztNCW)_3}*Z*=y3xXG+E z$v4{|o#j%Gcfr*>5b}1}RcX9e$+WE*KH*n(!cEF!4@tdkU&*d^)4=nEJ6S{ZEL7ph z;oMC&y$xfw#%AwJC~=6AX*ZR*o6jqfBAkiuuNl~5C9RSvZJDE63Co>y^=SmH7zZ#Q z@+NZ+RN)?pcZ|XY_1-`Ve_up9ZVE;3prKl2T#-DtK`%Uz<*PglH0cKy{O!Bqgzn$8 zd=I;ngIH{ni!~k!&IiVY zzHro4bDc6hxIg%+>v^idoT#ZOgdC_+O1cxdpzeu#QU-7SdHOTp_{|Ag?iRo9Bpmb` zQA`f^(kMyZ+cQhNME7-7i#(TUD49V|?$)`ig+=>6> zCoKTE>4x80&>--ICU&nDDs90*n?Wn2xP2|%_E5@=m222L0Fb=#tkMcO@Ffhsil1W2 z)??Sy&aTe2u@boDkaUzL79s_8;#QBzh593pPu~Lrl$i(uyzdCE{-AGIYBF_rNHx&g zo3hiaXp`K1l7*T?ZR9-t1cI6Z0zmfVb}+PrChbmx$#~YepP=@;_4tmmAh_o5sY$M; zqB+U}h7F&G&(|JSL57Xj(@&eVq+ocz6IyqLMpl2OT>fQzDeGuV&bnh30Y6pwn7bw# zGS|STVdJ)zMq0wB#p%8j7d3*u!B~48oDme@jNru+4yoCU=%UMMod0>?+R`XdjeGK} zXosH#)1=o})+yA)a}}Wy?9h#&8;kab##)TlQ}42eiDpG}7e(P{3AkYT_E$STwAfF#EpP!W)F%#1%w71z$yZGRk+H#?JvKBt>NcLEbXbPT9C3`6kb za>mb>w@_wj;=6(RV`$zFe}_=$X}vv)JWPSzhcvZcJ|q*hyzP1J?9TIObmHLx6f-pN zE5&8Ls&@L(hg1)AMo-GFKmv84y`ctP(`h=^veQ?LdK%i5Wnyh>>4ev{20lu@QzEj~~Z8^V0y^|DJnVp#*{=dk_>sd}{CpO#>Dq=@{JuUAI83wc)~&Q|cDY zdoQK_xv!#@fAkrRyCNmzR$swuOx{nvNsPa-J47>X)0o0$Bj^wyKo3muoD?GP&PiIl`vkM z5;dHF@)`oKx~07{=91}i@`I4l^-Mbsw!pdn!i;){lc2Hq>g&s>sc35q>XQueNc!*CTG zuZ};g-R=D+q1%5|vmH)!0K>+|9D4c2& zXny`YbA%+Ipr&T<(VMZ;oBAB5mxMU7Avlq>I(cUl?-JB>gT`rNT-InY?un1qQ6E=4 zPsJ8k6(nA0r}M^q80)3~w%3^E(K=wp5CeCua1#W1-r(purM+$g{=h_r%sKM+b!Y>% z2_YNU4_r9&K@!hEKhiX?X9+KQ3NleuhKIr0^nuyT;)WnWkm-0{I@Nv2)RYHRW_xq< z@|6s+ue|)WQ>VZimaKXEnOQhuHrMOc{pN+zInf+R=8HL!&&Q3v+52NS@;!L2l)oE} z{AtFs6ps8KZ3~;*D0Grbk_7hc`;WGTnhpi&od1uu#ZcLIE%o1ob^k|3Mn!}0=%40m zfp}$91u2n6V%zJLG7<#rQe$@-#9lfqyFe~w#R>`b@P`E zIW%mUv8oEwkN&m1MEGk3FO4lfpXn{=5yNE&S{(k=#%R!v232~1Jf+-r9uGg^3PFJ< zAe<~A9lACvxLp9g8E@U3g9FSEgET?3rFmge%ntrq2EfkV;9(w;TzlQP1+rwDf*^%# z2taapNMagpglQHL?@b~LN<`@>JpPRpMRvk{VxqIAP<-vD|+#{ zg8F5{ei+6zBCaLEV?HFFYJ#xTz6IzsWdIc^4M8%MB?gn9voQ~G2$_00!Zh8c$Pk)@ zjWFJ?!|fS|9?*r`G7D+kszvVff<2J$%WGF}5!vZ&Fiz#NFcv}qs%Q%YCNGcykY}4- z&=I%A&J`omg3?|2ZY<7Zc|tM zcG(v?gIXxHP`w)Yp_sfGmS1S;e*P)XQRvYx&zNZ}Aqp|LskwfQ_i{HPec;8m>o^cb znU;4$v@^7JRG!}Kj*blOTN2#C^eGcMUb*rEm^%dM3`Fl8mriI_wnYjC)LWH+Mt}XL z(;zLff!=T8EZ~P)7jr}h4nS~)7^tg+C+g!QqaXy@F`>4J{YL5T;RA6OQ%PM}@AS_9 zc6u146+?|T1vwoDJPfh79~qOq&$mS#CGMo51v7DaG4|>`HE){Fi-Yj zcX<@Aeh5rYgcc6N`hCl&x6_XEBsMi;HlN?DhNq&Hg2v35E+^dXfoTx3q@htOf_FNv zJeIvEm^C7|sF>;8H_U84`*OE)>#;`$YzRv=1&WFc#9JMLmZlv_v7V1BXYnfK0vc#{ z8X&7?48&N<(Li#px%Ys}Ol8PY?ic84bQY(8Bk$L6u3*ijmSn}-TnoAVdtnHSM+Xa( z#Z1BQ$F|tL*N4_91<^Gj`wc7h%L}@t84A!PE9T+TLgq9KdSoCodai$|9tH;>9^Ovv z%*cm1oBO8Gls5NJR#%i>g_(pS2x43&M1i;(pm*W*HVTp`c*`tYM4nC2+7W<}&WiLA zy2P2Isu`XPQi}D1b^AH(8e0KX#gcoh3t`8mwI)!KmIRD(K7eTatp&8}iO5bsWVRt48qKrq6 z^-2C_RWHI@-Hj-`upH#b@Y?E4%>XveKEfs<+om`_cMXQM@4ENjF&)-J9jo~yEUKWaoM3;ye;+aS()3yWf#_4M*3M%bb};DAfGH6fxMTW)4ZSwd)qi(T-a`h~A2P=q(b%D=28+_MaL~%1I(ULIV~}9rXucX!q-8blj3WfA9t;a9FnZ~TWMR zSan~mJVnYuq2(5-$36CT+Jd4(iFCE|fnK?VnC5heQfuD3_M*(|>v>bs*GK33q4`Cs zhWRK$);?E_bM!NL@UFE4mbCnpTV$p?)-Q%Y$Jy?5k#~@#ls_548W-)@dII%fgXM^C zbzROd^1Kli&bPzAiIXMA-p}Z}%unEetYrtXnkNaFa*>!ZQ%|{I%xJOp!3q#Fyj^($ zFK8Y-0g3b7IxuZ-gxKdX0(mF>V#K*EwQ?-J@;>Mp(5icevEd!iK_nlVHiQVxEsSMy zD+fr#pwi%6ZClX-Y8ox0?xd?Y{h-~_LIb*%#Do9vn=j1|uBV*vq`roLwTrnd&oo@s zGEK{2;kj3Eb0!lk(}iWAFB$?~I*D-$07X5h%mS=Lr$86c&%j!ERly;Pwt;bu8?I6HVDGyhlAW#_-ZF$EA{iWG2?7aojYHuvRXx%_Xt2iNP2 z!E!z6c9>JQ2o3G0(EkwC)!o!Q_z}J}{XlnM+~dROKl}>Td%soYeI2M0)I00-?T2AO}IqtOK67npgqcQ=`_3jNhRa zrI*MLaqSCG5l!pe>z5%lL=>KIO^Hj4TycDvW~Q^HS>^fm!fCx~vqi_ja^E_7 zzAKKE%3{+&8WR`O`IrOXJ~mbf)Uo7M#Rt_NRL>0MlLU1M04-k-XG|2sMb5Z2-xSPv z1^ciPWD!ybhQ_C)RH=+8IroZl!g^k*GTik zoYnD2Uq2Zu=i|#m{SMk(FO5!XU6VBfz%C(zuz&@5Ue4Ovu-;w(rD@4|k}w|<(1v~m zt@4NSLrsoT3Z7$XrrtbNfwIn^^?Ar#2e0}ZiJ)5*M+WL@o?0p+**hV3p`HTB>4gW~ zm~DM2&Gp$QG6^-1HvAq9yPHK90IWNfs#6;lwAXQ>PjkmWG$*er6lkw)UgrlZa^mkZ z+Mq;3XT{Qbo=iGz$d?u=RAMzJF}qsv>Cv%!LIgz$5QUskh|kzE%}atY;IZckDZlp;&rnWM{euW>98f zcGkZ@R(4jehTqp7LDO4eA=`CN%7VG*z;rwJ z;{GW~tgVX9dod6CbXH>CCgLurcb*kL(DaosCL`S2R;FA~XU~b4deGTIC-khDWp^&C z%w+e6fMZ1{ayDrn3;z|SRBOJNQhn7{Vlr4_CS*(&Yr0s_ET^COi{;AK11JaQ(#dmZ zJ%<&e;qf9M;Kx(fYF`@$_1{>gG^B0XtE4)h4G3-vDd`C=K^m~qnJ9FgF|HwGA> z=IWif8w8UjN6Qba)?s#HLyDQF+^?wOaLJyL&-CV79lhB!W!8c<2jxs_SIB&36y)W)#N%IE z`)F!GK6vMDmv;w$Tgi3t)M$KkT}C~|u}^fs9=m^WZfqVGfhizPf=Vg~N*=-<%{$s| z)hXVl1;Ol*Z|o!XSccmi&AdsNc<#{{AHOWY2v3C#xvM#qEbWpXT*Lojj_I&G44`g_ z!}v}77$m?aJq~c>bRMuSzpe-h<%>`}j6~EZ`~2e*N5>b}KO}={iReSXS1~+* zBjMF%Di94P*dhgXt&vN113dSU7)QRI{mqcmnkxYt!~Eb&mK#;LAdKb^JZSgeCs)kW zW#E=>T)7^hzf+-YIcva&m#`aTJpkv0na~j2gn`SBpB>3rAp(XrbH0pYA)pR zheij9;2Xy3MY2F}D%}xHIfyU8&vT@rxY>7x4!zr~bsTqWiec(S^iW|tim1VZ`fT-t zCy8+cgL#-O&*>+t_n>%??pd@AidS*LR*;lv5iVkmt0!rA)`Xsl=zARWGpJ!@Blno= zDJB_{<5HV4TJ2Lns3#&BX}gvb!zkE6z|&rwC(@ss^@`1ioS0renQXYF%TtyeCIuE% zewte=K+u!~f~Lm)k<%_VDw3e1&xicIt70V4t*}b5g#E)+imxMTSvSb%B@ zSQONT$o%4BaL!T@$-(Kw<9C1vJQswX6xiO|mvG*4x1ntETEtT7^|z>asIk02P~FIr z1rRmSYxlBhC_p?pCR#3#UA+iIMf<8ZU0uV`#avp(K+8{(k1|x>QVP{bvxS|C$DX9q z0Gn-!TSx<`YLL4RNEWr}#H`n_D<0J4tP>3O_uPf&x1&{Q5Fg5(GF-HC@Fr+fXKAR% zsY|$9+MT3K8rl>~klBOOSr?RB<>Ec~_IUCweM`?wSh`f+CLzKh-3xI1u)5sR*+Lxz zhMESy`soC4Nc>1N!DYsw=FzyP52MAmI%DA5IWEz6$!*eEOCkXOf4@VHT zQqZ<*m!Z*;lr0ZL{>5a82pq-Gv4^!6Hq;;_5vq5}Adu$ZP_n&QLCWrz^7K{fx4+jb z1kP-UKyrm#y6W-$C&FHtJT$0jF3wu0i0enP&Vrlb9aWI0u$bE$WmEvOiTSuDl9s`; z`#$-^35oNj3E@TFz{<Sk%Ua*5z8lOgGk);wwV^6pR5cwT{CToy40X-giiAYuY=9z3#$lV9E zd5!+y!mrC>xeW+RXxKLwCz+g~IRT|RNfZC0b7NW17-U3b5VLlN40=ck?FJxa@Dk=z z|5Y56H{m93S>(edHfFduda%4xmiVa`0i!Sy;dKqg@g$`X$+zcI`ktRVw$DaNiT-W&oG@H9?-JTa zq~*=o_%X=;OAVDouN+tRuAniA^499-(b%uWtse}u>UG&?M#Y8T7o~X zBK&9-dEV&19$u}$^|ygqb^+w-#Si;}{f?ae@)GeKC6RI=GX;NKm^r7tV$1pbEVj$Oy=Mbx zv|V(d3X7t)k=@LN^gN`FAC+ZXk!p{{5@Y`3+Sd`H?gT6H12erdXa0Y7?SJ)weD@y_ z!HH)FNfTrH=a6#4USZ z(wGTJhV_fBOYQCVUS}z>#bmV)Ds} zggBW5?>|X+zxzvJkTt(v1#9HDci}((?nYQzEzK7lm zkrmj*DC*B~6#0SoNU6(jmI5VNZ*?F`(>(Ad-1EN^SERPq%_73DFgJH4!={sW*@Tp7&aN6^vxwm`AuCL1 zg^)?4XHVE5G=&`*kDb3T9`hq?1!8egFM3Ea*P zl%jQZPzP;=6Y$Pbb{`tt z?Fpjav7aL;W-R?kXQhLOFE{l|I6~_+dYf(Cyrf8G{5L3)=a-E`3oS!k<-p6uCF=kT z(u7jqC~%}hDz^Bg=R5jQ4o%~gRqGqC^9Jv)K8BjsLok^=r>%ef(9Ydp&YR6d(=#hb z>3ia3bfyA4%NZ~;ni>M3!v&Z5exsYK*UsA@z5}@geC!8a>B8uFuv~ifteDgLbG1ok z`ny2}K9^VwDoQ zV)t_*t&loaOV=CjIT%AZeG(&=?)7gV!#sv`zHZOJL|Vd%vvPvIYS~RipegQ$zUsLL zT8aplvF|H=RUaEPe?wwh3fRU5Q<@yGVrYnXrxQ%j?MnuFXx2R>aKY~DJOu7tPe+K5 zjC8oQS$$_9vX{1`ph6H11AE zKJ(YdTRXt%mfE=!nQw962R;WsVcy=~w?7 z-iY#@=j@?Pkp5{}Z8!j8sEJHoK00xr^#4YfI=gLP8gV^vlnj#28&I-}1hym1O^9CR zF^e}acE{p7lvBYWo`%|C2B7cZWZVAAhNR-R*rO=ZS8?&CK#DKofHdlu1&O@e&#tHF zC{o51YxA@5DGzsi)m16^sz)FSX_~7SkF?zIwx)3ug3;w(qjV_U zDlUfv@aknhVNdM~HmiGMqf9a%mb4nT6A;L_$zA%8EGW2a_ifqh=#5_cFzg-^sPV7w zQ#JhQ->^1y+Zi~a@M;(g_1di~Ww?}Ru^}I1ywiG_K#AyfPJIne`*Y;=!`)LBeMgmp zqzf`8&`!kn$E5}{c2X|3-@+BLnT;PjAtfV$WFmIZBbqf}CKZGd2CEh+=4NCBs%SRV zGuDvmW^{RfnK8b%BCcGd3yqo{z^Q$4rij}mtG_zt;o>X;mEe$uBV$_%L68D1_BnRw zR!ZmIJe`u%4K~E~ZZHhGKfu&fbK56q&b|lw#4Aq|kp+}ujjGGsLb$ojL*Ub#Xu{N3 zDjpd$~#EuhCpJKg-vhaN2;Hz4V5176U&A1 zq@gVS{aDE20Lp$#t+$Msp1R(*7q~PIMG)v0n~>`SPGC-CAfM*6>jwjB{5x3dMhp|O zMsGAz)FtXHxt<+>&_D{B#`cH9PXOQKurnyO zW<5!GK}3gGgAKBa?~FTMBOaN|>hYM=fK9Q9ZW^0ORq)nLZE6-zkYG0)A~I^`I1zj# z6K={Pe~3V{uQoJg$gk4+^>W4kwbvFo1W>Qv@bL3BVCqTa$#bksN7>cA58hy24n8g7 z!og_K79{U;8kzHunWk1vwQ^I-107Q~yW*XZ`#sW;RFRvt()%U~erq^&c}*j#RhBzt zuw;3$7-m7}GzE-uPKTAAr$A8piDU9%7t_tJAu zl4<7Tt}=qc*>u++Ruv5K*thZ9F!Sku9S zM=9?5ZW^?!SqiJu!$Tw7^XuQeyRu$or28(;D!w*GirH7=qPDSo^#YR2OjX%Oh$ELR z(kj>>lEvcKO{2N|#mI88AZ_vHV8@{Y+#9RML_AZve8CH9+^sOCo|ydRz!PyCAX%jK ziQe*;da664{0)q7W@b=Yuk>bd*f?bjt==a2i100&19jY+l@j=z!Dl6cxCmgR zZ1S&8j{xLGTJBuu?n|%I7A`_c;H7h#MMwtN+z;cx3H+P%Fq`ZJ{VGT0MQ>puKHNrm zOO*n?D)$iV+8HvpnY%RhPs504v^%=*ao!ZLD2C;G5Ke|MqWEOIrsSD883)F5P{!+1 z&$wXTnB^{@Uwpx0gEe6>4QOmzfyTC#2~;wn*LG1+$wS-w>W$XGVUY>PtIj25L++;7 ztEFJX{)GkT$)snCE(U*2Ec$$m+#KsF=ZQII;i*LXpy6DmX`8tJ!_EteT*PK7cI&Mj$dC_70! z(e5~mHw56uNuML8C*yne-ZG3?H~$%Pc>{;U(q$;VC;f}`V~X~?QQKMr8p!Pg=#K|+|p8g7z=BOmiy zbLXVeea8x54Bgb%Sc20@;5>~u2{OBb9LpLDrDAe%2~CD?ld45(Acr@s?bz<@s@#=8 zCNWigG?m*F>0q=71PDL(oiKHOXyB2BM6)%YEZ@m^d1KYG<5ZR({eyoI3jgttO~(U+l_gUqkfm4k zOg3YVSQJTmJhq*+0{eBtGyeV2BR+)U5_g|^pW{?}Jrg zX22r0KGB!!aS#{FHLNX%9i>l7f|(hBOD=sV5maWwARjTkPln}D70$IjOv6orBCbaz zF?cpAXmvm1;DDcp-b5fwQN^-e+Jx5bwR&0hE!-VtY4WcV2_36YM6~voST_TGfzF!$ujl$yhPYkDuJ&C z+q)NF6eo@?ZXZ4M`@G^?9TM|h&@&dIfj`po6P3qMa!e!+AFHM@7-le<>=;bQt{S?D z!CQLfpXJRPc$)F#I&LwRsTC^e4tcA(y)cKKF<4C2dnD&Fk@}mK+5psv*5U(xgWDZI z067!1heZF#{SwCBBDEALYMLz%Js0ty*$wRQNX~UEn_^^`H7c!MleW{&J>xvw@JL%N z@eQ_Gh8MmzY29#!!NRBp>;9T?KvabJyI8#yYFL$g*M2ZATf|g|Y@xwBSd|7^gNrh> zr}}T3)~GE8)+a_+3y_m_h~Sa`HF)v;AW zk7iP3w`gRi+L|kBQ}XA5yD;uD1#-aZMUDvhMeu#=rA)APyU73Xt@?XQpBpte_vl04 zF~c_;dQCRva#KnJuK)6Jk|KfQJS{8Bg-U@pSi8wet9$rkW`mPyKq?bn4NHMCg<$YJ z5ixOcz2tl|I8zBN5Du(L%Y)Kw`{d0RUOt)AA?M7#^>Qex5BtapXZR-^{B3y%rsP|PU%ief}CR| z93KU9=Po>?TreOXzoykHM@u6TQV+S%95V`CpxK0X4oORxK*fc3iE*T5#`j374dOZM zJlMvdsDu}()N&MRiVUPTl@V?@j(#yoij&b@pY{k(WlOG2r^jpmaU;3X}?fet2@emw1O z1juXQ4%B$7O!c$JbLPVuYU*w!sB8u#?u#-wSTD%n^)67F^xbk4w#a#mzy``~_j=pI z_mqhT0C%zj0MCrUI+lD(=1G0fs9!p#{=5nT-pi31b>TsOo!|pFsrv??jNohP2X_yu zM2FU{DKSx{o*m=<3PTMQ>1FBAXo@U8^W99iD5!2s!>3^oZ?=ZK)8iIC zxc5@nQ)WTUle$=q{l{C|fKyqU4RxcYsA?Pz~80&Q(FbbeC$?Mp1AqUR80` zj8fxjQ*cz8HV`4vClI(aU+HAF3qE_b)hcHLAhQZ9%0r0Zg}>qa>kVw+R2O1tM&9LA zXBbE7w}uLBQCgT7PS{^PR51ExuyMlrntL~9vK&N!3zrgXlon0Rr%nwf6K!P4(9!?a zQvLCF-S(hqauTJO*%lP;OV})zx0loos!|HGCf}Y-Dswt1kQtPkPr%@yUUpnCh9lib zrlhGvep7_kg2Z%FwWWfqJXM@i)^~g*v ziDdA|goSZAX*QIwB@2#(ms~Y7@AOqHqB}a<5|AIPo640S-%-!sEGIel&klP5H`xij$bV*$N=Af^ROi)VF~4Dy6@)SMe?VB#XUVv$9+{j z2{l#z4Vkwx0VYJ|0-coehPMM10+a;MNkzIE&xeh?Lm|dZm!=GilCq{n#3Pa1LMC-U z>KR<I90J&N*_DPbg`nTERE6qeZkmIVdU|Fa$4Pq8aA#VY65Brx_H$QRGazda9@y4qGx%MJPfECC@+o`h}F-zYeXdoma0rI2o8>gN5 z2plwloYISZ-HpZUn(7{E6FI(sESXm$-jvI(XVXqqvL4n4g+7Me(jY@1D?iA-8z)36 zm#7`Bi5ffH{~P=Gu>4@y=JckdXSIT9^*c#K&zXZ0?xV;*l14Lb9o^JCQ;wuiar7f> zbs0K7Az|P;a*OoAVl@qS+kpQn0JMgp{rRy>QP4JyjVq3>t+G~tZ0YZ78GdDSG=;?N z`-x_0_io;0irNtRv@H<5=(%0WE=fe! ztI4l@w&{x0&GdJ>z`nx|vcZ~rZf z@Y|Pl4aDMw9M=2iEha!%lj-cwpp@x|AzYR#m=V^8eO2-Y^fs*SwQZO};PyRVVK5 zAy$jdosz6nH2%yLGEFn#+;s;4Qy@r{onUWWq*TyKbQ#!}` zDEcqvia&EsZaQ$`@k7f_tsyuRH@*hEM-@F7*i|49MYhIQ8T47@w9kl-X&@CvBVFh0 zFYquc$F}SeOM|h)^CQZPhu>D(+WT&zehqS&ZR(b+H)JD0$4J1~RPN5dJ+G|(P8OcD zT@XLCv8v@9G&q<&zPJ^?(m9&yFVSHB@J#*phEN>wkP=E!Kzx=~6+s2LP(#rKddmbe@?TD3QTWxE?3J*jtNQZY{qN28K&I@wid?-O z<@?*cA|6{Jja9(b_JefH>&z#zb$Sk6TnE8a8F^r)90ti807As)N%jY6@HuRPn#z`daELQR%Sa$0n zdc3_67XB|Cq`95((69dPpc}LuyNbgrBE1J>Zh9AteWzjav6}l_@VTkMsZRdLK88QM z!vEkPq2>nr6@%}G!T#uA}G?ONwq)%geJX7v!Eh~N=-mOrB@{sDT;sPsr4=pq)@N{v2qchrH4L z^QZy&-@izkk4w*uds=j%Up*#J^v2^})WEwzS*Pl$e z(45}~Bt1ajDEjMrxES?-@~c%SAv;UxT(@Z#S*gWncH znNZf42}aE1nuXa*f@Um-Cz8`AAWhYc2Ei|QS$8J z%+%e^#CG6Fpk3X~?U$H-dVx*6U%)E9EF+a%z4(?3qF`6U0DrfZ0;jZF`uyn|HiwvM zeLlUoCTnVUsGDUwlk>(T|Lf}7`9X@No0Ae8?7mw#@83Z7u}=0Z8EP)1Y}xq^-nj1O zq{>hei^QD?%sV*rd>L<_onWsI_*f6`rUY^jd=K*;y}^1kBmrK|aPx;|d)#5^K*a-0 z3X5|T8OU;2vVTwT?!gp)`F?F#l%dH|Hilyut}%U97A6)2Wgq})1)k6_foz*vI-4=| z6rx!wAA`zr7&u=Zh~gvzLfba*{%r$aLlcK>diE9E%E6Bih3AfU4{0GDJc+Y<>kI9S zJZshXHMhP(%Ng7Vgr_0h;}K$L*--%fDjlS_g?!gu$v*=`jW)nkTF$Z~a|RfGo9;MT z-Ny~>G4sS&dcfjBF6coTmG{cC#O^r&rV#<_`tI%&@n@=?1=c|)!({G61UIwB<|KbH zkqyt3l0LHN(3@{-6s9J)_M1CGQi1(ZvR z5bl6)u%((d{J`?%YlMPXnC;oVg40FoR#W>WOuraOpuq2q89ZCG|)@xZy5vz zRvVhSpx7~KkHOCqBXU#kpZfvF%MtL&yv*>%K}hFWRRX>vWoRh8L>3ri)GhIJa!Y!M zn#CzQvrdAk^j@b24U2>bZg;pb&PW#90@GoBncmbm$NmJbPpc96>k#J>d%+xrh3ylz zIh7??noA(BgYn?2#+!+qFGCYGwQY=KWB`JY~F z6{N?DRjLH8Dsb*Ij$B8e^BlInvWz*Ed3k@apb`lb8=Oq_l%j$je1(YI)s;+w8)2=T za*$muA)Y#wfA?aFK>FhM89@xOo$5aDD6L`9IeG91*XfIaR1y~7pH^!n@Vo~ci2B53 z(pcTiOWg!nzrz2Hs+J=Gy?cLs24f!YsBcHBEg`F9_T82bvzp4jbf2*gKUMxIM5tU! zB5!G;E|mS?LdQ|DhZ5FP)XozEW@}}vYWN#!Ux5<2H!K4s9m!><44Gh4qPtIrgWU_V zHY!2N;}GgD)3vLnE-z!Y-~2%-*TL`V8~NqC-ZnQ0SY3*hQ?5-GWl>DBV5)bTyxn<- zMUg9f*<^N(^RpU%Qy;c<%qMsh!!Q-iUYqP~G=}iXtJHOM7BB0qpjaP@o?u?i!p@fT z;Ote!q@mR|m{I5-sPy$2#yoKnBG>juq!HBWT|T2wDZ7p*k0ZE802pxtTw>?i!_E}jD?Raa*FcSyBGb+??ce4a?&Vrg}npYa0xc@?2fL3+Ee-Gs9DE!zBB9 zoo3|13ErnGnhV%p4U(eUt=2BjVovYK-luHFW}|_g0k_V-^bEY6Vz#=3$Z?Xg)emBt zwn9Q-Mfs!X9WdJrd+e-7kiuY20sTo5IA~7P7MT0AB`IAjCuvpcU{)9;S6$5@$s-0u zas3JPl2ZZm4Lerg%F;B#&@AYbhdITx`dk`cL?UYv{~fp6lHt|j`^LM#C8giGDDs-N z>R}8sehzdUl71aDl3Zb;MiAC%|AVJY3p*i_Ay{%>j|eW;wMJu~Yc~?ZBNDBXRegt; zYZr)w{WSVbgP$S3peR3BHx10jlpak}cAzAMFqt~yqw&VM^NMfY$nUOuUPi!NeErmo$yAQWA5ErJM zDkRbaJd!5RC_6C?f$e^8Zc1;;ed=?L&4^mq)CD7+amq_Axykwnt-JAYjz$aEl3gyZ zxf2p&(ZS*`TXnkiM2(p54RFq-L6XP90gMe$q>ec)K&;N*Me)X?Fos=Mis)s#okY4* z9x7g3#{7u~_Ps|Fd*TGOVc*kB@HS1@NU=7p?QMm%pc*(6_7D#FOM4XMcHAZm%znZL z3}9+kFmvhbUQVRWJi^uL!hVph@`wAuYbw838~DV11hN~cHP9+JVssP)?!U_>2y~QkMQ6?6 zyLgvCrAFV1dFX`Vj^N-->)3z4M`2-fgXhTO=~5VrCL2@I2&8(}RFps?sb3u_=t`Ix zNxf!G(8g@HCgHbMc*8-nPuv%7`rt6IrAKS(>^#)wO)HO+c;bqvgBc7t3ceNZ(+VrY z#r-XI7*wG7)(vi$X`cPn2JtV|M6TyZQPL_eRaPQX4BSSAc~eTdKb&t`i5+A?{gnoV z_A5_@t*|v4zz&BvfsN2a@TY7C3QfZC>z9xIe6`Ug>hNBUHe8vwdH2y?c(>6a)mcM% zC&7fA`S7L>N{+|K6Hz>{NvEbr6X*pKe5NcX_bSd43^1QAC|YhxN+?X(Av){OrzFmH z?AkIMVUF0T_TcWp!kBfO!YWGh+qBkE6F#1--3)h3G4T5kZl<6j{-hKt&NPK!Yle?9 za9MGCR9E#C%K3_u!?T}8rlrp`?9k(ugv-U=jR1a8uvM z$romq6GR-EBPm{J=Zugru$-!Q8HXsOXUUcmHx*A=koVW*avmnjnSRO#rxBm$nN%`a zO3aST(5K||&>*SBE5i?`Jb(s~5Ink8p(c$mps#>U@#wFw@=P-}!QIb_wrR?rER2;C zyKkqb#qbtaPm6YU$}O~-DhK|~+*d;(tS_;$`CfVRLdaewIOX;%-_;eItaeIp(}(2_ zCYhE~8tP8@mjeyp95jsL-p?Y=x4hrjv~)DTBYM}76+w~Mt62LVxd1FWmX&!DzbQ4J z4q|TXl~Y>A^c}N6_I4gN8($_AWp?Upc?p9b|DitgI(DRr%1=q7l3)v-PW~X)nH>Ga z9~Lngx5S&!67#EWO%tUPO=|t%?4GQlOv70fCnppW=|r4DVu3r+;xQQ~#C$vk-@h9I z4!Z$KPPA+gTaiZ)h|+y1b^rdS+YCMR6g$DGY#h#)7_3g(U6?%wgxrtTYue6S;p7t; z&+T-(0YsN_H;T2o8N|YVSHV0F+=YHv0f}ng>|vbWE*ob(Rj&o`Z7vj@E%5}ahC{k~ zZ>o(_-vE`ULv-&c2BK9cJSU0}wEG^SRG6(D80+|tH>7&DBS%CyR&w@IdV$YBjBh zbuCkvTA)v3v}5^TamuQ9id=h{>SIT0zsR!5)uD)KZjIWb9MF9UXmQXr(QS)`#4i>;;piLPmzSL2XrPX z`%N;<#mLq5>G2IHutT%lghQS_D7YezP&Wu?V#leP5>enQB+Q>$oG~h{(~|MtYy(r@ zc@J7H%1D&P90LZzqM?GP2CAjinWPPga~u?rX+t#Cabej(!tfx z64Zvp%oj~?aj+yG3FHv-FC5+T{%ve?P4N2Ydr1px(^eAb=Y;!KGqDM9DV}-B78JOS z3FW}lq}EVTA5(&qcRM7VAfwR}LIh{J8^A-zYLCB(`Tv}QqG8>pYaeq^HN$%b9B?{k1fGYkry zFREI7Ih}ZRbL6)OER-I|Y*Ah8g(J2Tcgy>vSYb#F;;NUr^n;o2EOxaS zwWy_=wd=p4YZXNOZuV>HcpYk;AzNs!ySDa4_a1!O&PL=Ddtw|9p<3Zo3EY3#w|dyR(pD8E1VfdMlVzdO`xMH1!Y2e zTDGisS(^AQ!6MtuVzjrJJTi7B3FbZKv`v|h@Xb+gNH@TkuzQaKt}aXC9$PHCnb!%xKn#87_g z#LAMA-h^3g^$I%$j1qn<^47UbKumMg4Ystd= ztPv;o2AeF^EiZ4x%H6+tYY*4kFVJOhSRCFI7$^u`MrzlW#XhrH0PraT zuhIZ!Ug77GKf@ksgIk_UGvrNP!J^^Jg!R=zF2m{p+M}n5hwet7J_QP%dHV$AVAc#f zLpq0+wWEdvvGsrIM4YSpcbf8p(4GfU`1)0e%HI+D&XwDuEuV za;wkv#np{k3UD4Aul{Z;D0E(dBtc_ZP@y^lqFA1ZIEGaBntdB!NCs?0g)&khRW|8a>pD1KwLuYt^ed^h$U^8sHi-PU;4f4qmE zkVy1<3pj&_v;^ZzvGOfiLT^}q^>^rlB1H5bzbJhVTMok&jyXreKCmQ~D06tePSOp@ zGhB*0K_72mWK(Ii?P0kk9S*#cy`5oyoi_%a7L72Tjf_8%@nevcv`B`9fGj~I+# zD%a&G1FoHIkZwXQa{$L}Em4oLW|Vwf>N%27HUtU^XMh&9hl^181(Q-C&z3DlW*;4a z;M?i|4{t<^-*lU6@j6cFdURBkw4f5%6EeMf9Z}A$3>)UH!;S28(bX=S< zWezX57`XwV)?>H`w%g7UWxuHi#a_7N|g5dlLiT zGa)Oz>%#fR;4@9N&=J+Nq~#414#hzG>~%!2Mq9e>eV1b7DvktH)HX2dPAh?C>yRh< zH9cUqQ`hQ%`nw$x_9usn9#1vbOb)lnc(%ZO=*@jH)8FcV%miED1G*Hq!RHu;!-U>e zgygtj=8N@gL2ag%RCi>ngL#>UgGUQ2*v~h>V!X8z9fL@h(&dO0b7a~X_}UE;1T}}9 zQI&zb|KO@XaY@SuonayhC%^4$26%THvI!o11m(=+o#>f2c5vDx#=$w`QalMQldbJw zxd-TCN}y}!_iO=4P`~`b*UnZ4z`rt15oGc>QEWLFD%Z4003uRcdt9j7Q3evo5jq`J zHUyW`6VNcn;UZ*G)CR)7l9m~Ovj(l-g37Z81(gw8JK#(Bz<_(u{0*l*zk7}#Q5sR) z=511|1IQjND2aWjEs+_0vDr1;6pq@s$5%pk$(1sE>9^JQ_W?68zNHs|YJ-op)Txr5 z;mC2V6>k5XmR^9^-Ve%s18B)^$|Cp81nEG7zFzlK8Q4LW!IjzT*@7&m32=S?h7EvmOK|`Qlawc%4VS5cvvs8mZC%dZV|c`YO5N zKIk6h?ABk=QwVh)J!2)+uGAX+i9(X~*2Olth3lg~oscE7Yq0~E4{G5~Thu)^gFj8} zatcx0xoa_0YOd(}D7QPF__TZqJ_)jl?{ch@vzVSKpF$Q94T0Vc(7BugXKmAAi3Xlb zwXtSM-X73jA<<+qW0BP9cHTmd_6>TbE=Rri5Xn42g>F)l{ti$~+$(8$p=UNucDuRD zTXRP3Re5T&5$>Eto!7fZRdyWK98TVvxKiR|d8%{H=$$4vxzDAq?Pm7h9bxy@Y)Z_1 zUY zmWh%qdPlj0|D0DE$AP>;yQVi`+Yj=1#`$NAoG(4qRS2zN3-XjLX>mJMJ1>~m=bt)j zV$p&;tpPW@S*AIy-F9@G{A9duA+8D7Jgi763|Ga^VtTgR0}M~lRnhGD1`_M`R)@@^ z`Y~OFFw8jg!*{{78g5-?oW4SLp-6FSExIu?Z_qpGE6X4@lp7lJ3WFaXt+Bgg)#75s ziwVXi&LZfGf4P~=XtLP|%_TJR*TgxQDVq>tJ z_C9kr2lEL&of_6LJ`BKW6Y7pR6!G@ks( zFfl1b-5yNOD5)%?ib)Tbx;@cZi+es1TbS)_mBrbv-T~}raE;8L&3-=Z31opUDXpV`A-H>%7%L}z-6zllX zR5F_98zhwLaRFG#uFv<1$kIiUGsSun{9MAbgB@jOPfvv&RmVcC9E`c_tR}B=GEJc| zSM8<0)YB@)LxE74VV7wP&PfK{RWjcJ7_O;=cgYS`Okq$sBCH$Xy8h36z;utgc{JG5 zBG8FD;6>G5KHN>9A9CBXmPK3|T~(u-U%5|TM^{zuzv-%CSDyvud*Pz6|yn(_I%cfsr z`>+TY)=*^sNU1MpgO)nqJyTb7bN zXV>z6#@PGDIx*4NfxDFl0c2+ZIE!1Ld3Xgp41Vc<<6-#EZTsWf279V2q|J<7f`E-1 zzwN(b_k01s=a6{^gdX59yOorZwlZB@xy}V3h^?I^TWto{2Cd{i05x9;?~Oy>bNYY% zxNg(IGl!pUT|RuPXhJ}SqYp}U?Q#1Y`?nJtpoxG*9JU-N4nK`dsuKhU%!grO-afOw zZ#kTOiczrlWidBAE&HhUfP2@^P<@yHE#c#rw}93EVHfk``St5_)W#HAq?Tv}YZzGn zjejo!PifUM=&t|9zYSh}dP-b){oOx2+5hG5r6z>_QLFX)pB8=hbgW_|fesyuS67WN zCG^b1rIkSwEax0j)1)wIN*|!2JXiX}2nNC8oA_$dSDy#uYZ%5(XJn7&V5?G&e*m1@ z9cFhG0Q9(Kvb^5Gl*0p0t-)80;nRKd&|$$n{TRu6n&pKG>`VLQs#Y>NXu>X-v*qjf zH;Q$&PYmj(TGrgKf%7VTwT`8#NS^lk-yCR1Y^PAnlH5{o4X+AfQDm(?=TOAK&8hfw zPY_Bf>YGV8Sn8;Vsri@4_GD zeQeN+7G3SqqB{bMZglyt7hP8SU}KKyfK2i#)TSG3N~+A>q`zV$7+`a+KVWzK%M+f& z{OD-3T9ZXbtlW_8;UjFvZY`TM4ao#~ay74_oawF>ADp|Y7$l|S}oAmisUvl8OEqf;5)(`#J*amA|+i~N;yduV5shgIce4wQ%%ZYmT;@t z;OSM?DQH^-Q=b`~p9x04o%&pjSO&+nyCzQP$@$bwS?@GOK$Ha2HE!THeb{BpXYT7~ zIaQFhdZQAT3P3S}nObky1fWK;_qHGCBgM>5>U3_0;S@8}iEQi+(?M|CwHVsJeVAoKo?A5L@W+D1-Qbj#jJ&S+ec&Iw>WzO zc2Ne?Bbxi?w|7%nudrf1L4KPYk|#I1nH7yA+q~8M@gLCa3h6V80opHZ^Jt+#?pqtD zch@b)jFjlH#Z;xISh(xa4XbzWAke>P^lfBfOTV*B^pl-+1S++a+_Q>YEN~Z<4oGP5$I9(c-N$qpn1Nm!cNdoL zSrsIx73kv;!s|48>5qr8_r!hWp)N|$Y05Q!e(N4;WE-HrHVwPEp6m4Or;nUF<8w!d z8mfGoxdd4B*}MQJMoCyNy$Jdm8<`qn&jt_;r7hlN+4`$6(e$%v4AM#9`cLz!?f%(%=123f1Rs2wSc{*UEx3PQhW*$ zyV}h%W}gE@+b_{l=SFu7oL&~7hi;VtGX4<>iW8zW9Q7y`2r zhreGdI?;QJVL+fEs}4jLve-s@lVLXD`7&-ad+?ZWq1znjTDs~J#ZLn2RjviH@>qKU z9jw`)=9K|Nwbj)TVBU$+L4Xw_O6$D=G*+!Jml7<>C%E6?*~Y|gyB+$m9jcjJLC2jM zzR8-dy%3=V$DQ=*bVm~alldDw-94V4LC}pX8W^nL_D#+9q3ES)6?` zdU$HaY$YAVQzxEMzgM#-BXzr*R<{X5T+deBPhc3uuj~-O6St_p6xQo~eT(-8b`yZb zP8Z}p1gH0vr*h+_<3Ea_U!wr^+(@P8-$D2CzSOU?N_ z$T%8Cu3sf;0iz3ul)KZFs{2MU#ppLz`=0@~%>ZTQe3X(-rUu=jSJz+K_!Bd4H$8{rdw>Q<1B6`804eAHnBI{`m~7)uB0{Kexg* z+vNxp04H|D^L_kGPm`iT4ZMGNzXgzMzX}pVZ>0ui z$GC~^Zus(BHG8Hc?a#zDbdGrQ@vsWA6@0=fZ4 zr5u)D@z8Hd!N|VNZh1rP?NgWYqA&puF(KEI_!D z##*r9;wv41nw!|IFLsaM0J-QEc@VhEh?A%69Vv91{h4pYzwQs<+ojdS<%r7c#+boh zt*zzv(KJ@WA7JNm*LAB0(@5M6;@7*->|?P#o}s5&UD89Y8HwN&+I%-O2-$d+!9-|2 zI|l1pP4$MOY6ioPm#}Xp0!0c8QREZh0?d%qaO$aq=InTeE27h_tm#==MB@N^yTWAc z`bZBy@$UwkLX>p-4gY}>Z9O#yY!g0` zLE$3Mtl%znz&^pRp2b1wMIn%>iT*wnt+O!a8B3G53K{CCsOVULNk%CupWIzuX;@RGAvca=81J5-rKB@z}5EQ4-nTX<<7R z`Avrs)PEl6gK^;}YA)OHX_MsWpd3JCrBcLf-)~;}bqL186O}3=+eBC3MZ|pfsnT3B zx_cVfD+mm@P?Xn$#`Q;UF@ztQR~TP* z_H$ESVuuzGOnF^}1%n2>*^T4Pzyh0?PummhO>~a`ByrJyi zgXdD(QMI$~0_mDA(>v%YaPNDer08|wX-KO2@U^#ho zlDnov8LnxOdW=9En;^s_eU1RhU!wB>X4IbuBL0qmCQyyZWZ1B2`o*=)nw3X&*wQb< z^m;}4!LU)YwYaIAm_Wjv$h(vhKCx7dXsrA$;9K}l9m8SxCY|OpVmg3!F}_kfrM+ ztUn9fmpiOCqjLJDV6faCw;2j5p^6s7cnrJg^T-th0zP7sXm0)qJB&!t*ZpjyTwlJ2 zohP^kA$vP`DXA&lbeWNIGfG-)8w|!H)b1LK;V$+9HMo@4eK~ZFgd7!K!E1_1+^Y*Ar{nc-**&)%X0AhVdi;q&wexXgQ(d$^_x#i(e2H|8I3u1|u=r=BuQ)Dj&(@O>-EJ zmNKra1_R&*L;<3$yl>23s4dai3uXG8aOBJ#vc^@c1jJllKp~v7dJ2o+>)3|)Kem{! zsr{nJaJwc`bD=IMh)@zgQbSGnl>6i* z{_f?iYu-&J%U6PVWQ+~si_B+dX||v+TwIez4!JLCWssm!QoC-rsHM8vMW66UV4tz#?+*y6Jfwim=pV zi4wz2=~KH_7an2(FiiPqexTz0%^y_G-xoSI$M!%4QD1lcakuqbh1O3PUgqZ-Ygayc z=r#gf|LA(}v?Cv&-`WV|w6*z{>d~UB}=7@rJeTRY6r(nbn z0+q+dzuCLe{JsgpbgmpepR@}=tI+<>0l9ZaMP(G`>`xS`@~g#vn|}K9qz5-YW}H(c z)_IvB)^JO?9X$@8_(8k!6_-n8`-wgIL8bifbBTYtp!;UWq+Gg=RhZ>edWz(L^saF5 z&66MM#h_sBB;R}fU&Gg}<2BA=dO670cYY_Zh?sd`#VT_30WA83`M}Qi94vqHX2NTe zhznioekc|Mcwka~RcB|*LEp-|yvB-2?-20tZp$8j0Eq)XKK=V^W(+v>ef1S~w%bn= z@4kG(7Ml;CtPI}UwdgC$mp}=5P199<iR^1N^w{N+ z0aLB1yF9%yd*2Z=wqq9ofaj2o*vp*C4(EcPN@?l16+P0x31#UvaK5lW>yn8Lu<>Q@ zytV;0&T`~uZ2Z?8KKMas_0KbTr)tAEX6tW@w57lPb&U#Dg1|)pxUuQA{6Cj?idg1- znu)P;5x4Ff0e$M9{~irp=oQig9JPMYO0fwoC&^DL|7q#?k9T#64-N-$-I;&)=YLsg zFJoE`0|-`@h}=J}Qh)yfAgvsr8z?$XYVz|tUU5Hm9Aak*UXA25!=M-R z1m6(c&b@b)lBAiCLT2WStkJ9MkGb9+V}@gu36XBT0-c0QM+VA(LNU88-sIx`^9eb) z{E!#xgDUwI=NrFr+=l2d7>pMt2akhxS8y7LWT<5QbNB^xX%5;pU9WF_JoXNTi*JcQ zDalbiwu85(_FpPIb)BOxoSN zx_UEH5zD#of|R3-cgL10OQAUNc>v=@fLvmE9xg36a7%)d88c7vt*bL7%Ybb7XH(Dr z%!K~uW-=Yw_rGhD_n+JLzj6Zm-+0>)H3ag-!}#kV3Fvpq3P4Cp!Qit5z^W#z z^UU7`nXt?OOIQmyizew0MVUD+K83azc~u*M zd2~P{&^P&v^KlkUAMMx!LDqJAAEcnb);~pj8Mr^`rA3x!f^``eHE~!mZ?wv*dM(|;`ySs4%6cvGJx_d2J zPS5Z$Bga(I^w~y4bH%}sH@$nZ3YboRC`$f`2SO|mO+@-68IZ!aBEB=s)4uXEfF5t@ z2{7pF5ZGp@Y1JY%aTv(lku0F9+k*53K-=Lr3XA7Gfs%ZX_r%j%4(EDLNgz>AKV7lm5t{D6EYL9*a!!U{ z^M<=2FnZB*Fz4Oyt%n1;9{#a%HGCS2ubdKJr+VMz zujO#9CcO8I=TgV9o^L?&FbwmcbD)sS((^}QVv+{0+Bt@HVaj&ld#^Ak_TM9BlD;y~ zJ{ok+?p|Ut*Hap9ld-d-`2p4EdW1o^)P-2UySMyiEBURZ$j`hTj-eWBSkee+MlJ@D z+!?rhhM1UzAB?ARVH6p_absy3&Sg{&!xjn8a;h&mXcQ)783X54Mhgne586t+2BAdn z@DzA`G|348I#L9pOmW{QCWG8q=Vm1^AN zYC2@PcI2LOv}aDigtxB48x;Z_W_%-#KabKu$e2CJku4E}owh3gR*~?D&pRmAK#}HU z9t2dU+%N1Z8&lX3gzA|5O&|a*6as+`0y>%b!HUHNt(urDP_&K)MApegNm%@sNbIQ>_5ffS%+cgqQ3Q2)AOiT)tfXg6~Q)0S`BQFera zZyyTOG&FZNP_Vm4V4xk$FR@yE)*s@x7Um#f`{JUn1f^GbgIw2i35*<;LW#zuew@q4 zoNZ)HhgBM(FojHF0A(za=?oQX1?YBUwrMaOF*+1HEe0TZ(-gdZ+F+9%w48^@+4+PJ zWG3#8w#{&3LM_PU=$^1s4GU+9pYR$9f`WoY4zUmStaV6E#MuqxuIL%|A|w!LxEyCE%+CwK*v zjNaxjBjVg0Ag&U!;YTh21fk7uY9x9~p^yvO4N8x^5pwz-{|eaX{G(QoX#s!6p~?OR z*z(-neT7#w^qed8;FSRwOYF{rJ99@tCC5aF9_?0J+g7V)=^Bh7oC9c@QZ`Ym-iCsr z7}NwL>#W-}Qk$(Fvp*vDu1`UBSIQiK)nZG@6^W**Hhp*ftwE_=VW(~;(Q)1tg|&TS z2sDUCTODus+UPa+`F?mg;~Q`b+|U?^KjL^cvI|mh6p#CYa?HnLybTi9a-5oB{RSL} zB~Fk+&4Zab>*L*pDWhr5-Y+&DeT2dYhfye9lg+!y#@51n2P3Av-i)FyF^vL!1(p_7rJ4?b@VaI{ySVRfm^?(c@>QLJ`EOXo;Xt%Yma4 zNJ9Y+3D1A&A<-rkSz%>h(Ax$=<$-nAG-(Mi>M4~0C1x996KI~r&Q--qfg0IKryl$A z{?YKMm4%4fgp%^w-|pp2pqiqDJjp9n?~NI5*&0?vYyk4DYxhdy(~F(C?X91<1)Ei6 z#yBC29a#qxf*mW}9WfhELFgi)uYB_ox4g${g#?>G`Y2IIV|7A0cvCR+FR#+5h>>ze zLm<`sA89@9$I|D)TYZNUTZFUjPODFKE>x^twY{J`Vz^4?Bl>ZB*WSAOp*axe<~K~e zhdtjH-)tnMzV_ulS2Gldc0ygGJ1hX{wtF+?tvKp6f?x&~9S7Rb`9$d}p}E)?Thcia z$ChHME!H7~h9q>B(N(&d-o(L>HaH9hK&Cjj{?LbI0ioi05ttFMI`aVXG+8p9xSOPb zTj?gx*-}tN^#C%7mt{ibf-c zn~AT)HHt5!l8O5yfx7M81|nvHQ}!0mFw@EV4^J`@fQ#YeQh%t3Zc& z) zmjhNHrvIk5fb|TipuWSJ)*KRwD3MPTcNC%}sAP-3A(pJnX}i8-XqRo+srxT4GAQrZ zc`ZQL0_UQ0|8YY$8zCsQ9aQ8O%l``t9UO*mvni}z=(v|O0STntQhsKlfm4)$;b;Y& z8df_ET0K`z8IOsT5+s&FGarJ*?Ts2@y45;Ms&VSgGz=S#at03r8f+_GJ*kFuuiojR zP7vgq2gN0mh5QgJey8F+HfY=8%osGyx{%z`twPAOZQoB~nULfsy|u zM7Kw8GRv@}zF;i!nz{nWDkod;_IfwG35Oi$q2bs%TfVwM%0aPYVj>JA+T!@|rFs?E zGFu5P7^zyliQI)9a{v)zdpWmAbW-e)V}2E1M0fUr35RuUk!^&B^c9z; zi-w~SXAB-zvkOqH@i(xWt{V{!83b??bF8ga7gK4x=YC5IyUfo1^=YAqG%~@UK_d_5 zvkvrV=a{OKsV3bgv!XSn@ZczMAZN(L#B5MpnXqviW+y(raeMt>7MWYW>GgVe zQ)!CJEs4&Jf)y2R0eptXeo9WjQ zl?`9gUJWw7Y`P61c`leb2w3e|%@PaLUOUw_yIKC4P0+6@@+g8fjVqQ!6)#H|E+a8q zL1sA~5cBNZJ>d-4e3kyYBkhe=F@@f1Q^WQit)g}7CNR8nxMkWdg=m1hf!sz1vp z&jNze^s!-80-4b?Rv)0CdUcxW3d5G%uS`&+eNC4KcNLWijiOJty^pKM+exU=;z>A> zy8U`KMWJP9bt-1`RF!C3GmTe{!|zjGDMr$G6^iMk3*IMA&8CX&?wca$W^A%Wg0@C76QCx|k+njId>Z+QFEYEw7v&n^^pmmi z!>J?pye7ZZ)w-X^W9ad9-g6a-G0A<_x|XsofPt3Hku5JX4*IHc^QtK(eMCc~k9+ql zXNl#3WpjKzHKC{rEMfMjAWN8%Us%Eb1L~i>rGB+&;KtT!inUhF?HNxYbVR7>u1rP_ z1)|yu#gHATJy!i8BPAB6oEjQra>1m=qvnO+<|Y(?YCRMYq#q#R0Aw(VZe{=u_7&zD zAo+;H-8E!a*Q4|Tm1iVoF#4L#{e2H2vhY>u#?yBzTsECOX#w*1y+>fmMjyAx_f6MA7l7p&Jy<$%>d7*yOJF>w+yIQ?ed3a-I>x8dz?7)f= zt85Fa89YLrZwK#Tf!!oP-uu+J-J#iP zSABMFp^bTR1Yu1>b_|(QLhN`Avyk3h)(FiQXCsX$F<-6tL+=pXx82J4dbFl%-JzzF zRh3yqcJ?7*m0laSg?W<>iLF>*8pGjM~XmjAC4wq1*ZH-j^`KKe;`RP$*KWMILJs z&Qt-GRB#vZWP{5(Es57HU$=#Mgxg!1tqbs>Cae?XUaiQr293uLtKJR_#K2KTNd6M^ zH@W>;sxaZS29lE83@+9ld#0Kw;`k}<|4U3?h_-s1iO#N5` zHXad?+Y2&N>3WLZHQrTWkyY|%bd%=rfFbJ$79IR=y@ zg4hEh!_}57Nl9h3RP6?NIfNJXL>pT4 zhv*WW$}>RM^`+6j->3ZGMi$yntU}D7G=03f6wG@T&X?}8TF3$?DiPz{y%ly1`F-w& zR#U>Cu*lK{S@O`w_9@@XorjR(Mnvssw-=5e6z%b?bG4JOpeZ`B#ZwN!O%%`#YSXQd8J}l<0R&DU82=VTfxyz zskrO5`V2-7*O|1SznNb-dZFeQz^&j~pOr;baTE|k^A#&qu$O9_1lZ}mEI4Dq94 z*@9S)bzdWb^n&YBVV$Umn&afbJCijF(aGvq zBSSRjkF^jB8*6(+`jU;MaFndj44|F4Z|2kZqJb;3!GX=$o@r?C$c6UR89*L{{^fV`*ufhvbmE@J+>QLXsfD<6EPP7K5I5s!~(pl z5-vs`i_t~wTCi&-&DL9;jJT#Qb~LjpiBVAXmQT+C>YSpQz_f8IXF8ZR$hX@QG%+?? zVl+Y^lD^_eVul+k=f%wS6@kLVsWziPwuqX8O{;#NiGLsj8|N@qv05V<_c9w_i=V2e zrebr5W>%W&t~J>)G-eZ06DS^sV)oI=8gAsvx;ybZ>%YbIm?b;tIiAfZj|?Yk_(`X^ z&}(#Ok4@W4=mQzroU{sGooz4xqk|_p{21+*Y9$$cBjoS&ntMb#bMdbvK><+_PEDA! zVMInm2L`~l9cNzo_^NtiA|XM)hA}akr*W zx?u2lwOz$tL+Rq&Ca7i1l$yW9pmRxCUpJnEV$F`d&CJY^fVfP*@RB->@icV-ALH9g zKE|!T03Cq>@K5%3Ka*1$X1n#8f^IahLL04NKz`>01%|)z1YFF=wC_bix}k${C!6O3 z{ODihN=r}gCgZ#6g-Jwg2T=Uh>Iab3bY9t3R>_XvW3@ElFe8ZJoIz)Gaq$kBdK2q| zpRWv+@4E}vKS7C5ZDmk{drv-ec(>_@9M)~;gd`qFZ1LE5skJZ@ zz%I9VA$Ay;B{yL>pj>P3cdC5QNlw`Z=oJ0DiebjPi&dr90LirQ7piweGR?n0G;z5J zl#ut_PYZ32uH^{=2M#zb-zD$8r97p6ip^&2lAT?mv0{y)Q5`nIT0-w0_1Kbe6~OYL z%*4k>D_?V^#Q(seNJ*A1IsA0j^2a+t5gUhGVFtfDdIWy(IigeSHC`f97A}s~8LmVV zE-=I*QdJFdXC962(WhrpuC%44<=`CKa6Zw_()eqsE46xv>{Wxt@=3^0di zBpA=d%H;^Do&UE+S{z8GT3H{K{);!0x-5vOnP4mc)SN(pc&0xi{3BtH;sy9jd;-#f zOUle2-gSXSk|8)|w17ZHY3&b_wX4yJz*@aW3w!a0)4Imhunc5}3hHF1!K$WFI^feq z#S@?(i?|(UwVCG&m^!xf%^*AG&DCUm%f08Imwfc>;;J$jkQ{1cuUh-VJJyj5LLrvO zi_?8KJt$0B5qE2eRda)IWKGca)o5s!i;TVj-wHV>=K_5F)C0>KBmvm&#PqQDw72#x zu~=eEh=RPj8&@)O-DE($aH z#!s3PB#!9I1T9~&@)6+nv!mGxlOrY^lhz^y@3%4OMF$QWl)_eKmm}zV_RY$mV=SK& z(o4h1qMMY~*1n!dEmBi<%D)7bwBRN>K4}VCwCvzxW^mP4Y)n7jQ>S?5=nf)U#e706 zM})+#DLvh@oTw3z&%ADp_Fya|T9>&OhC6`tTR3{w3C2}BL6zNo4y07c#}N)a_IKgAXwk$a zT3yDConwy7;3wn29Ch^_Hpm2|vg2Gu{jgtV-`MO6quFk-BoT@ceXPudL*!fb-oHt{ zojm_7Tig$5wuocQiyV4*(~pEkuda6uFNojpuux+eg`XFgfe#eHr0Bw$7t4aYmb`Lq z|4k-!VNXq(M&bo4AJC#|vK*=M7{;AP@1ATwq(R^xfG&MOic| zwypY0u>wEO0RLam?*BJ3NB;jzuLE06MwXGNwhh!%zr$@r1|OwQU;w8VF}jhRnaF(( z2FHiy4)0Th&3V`b!t%H= zicdFz>Q$Y1=#*VQDZifvvmM%h2r>U$r%?DUQ91Ch%a(t|u-VSX2GnUlJH}s^x%Hjh zE_YXnOAe0FK3lCyTeh#Dv1vm-1TfyA|HID(EKd=?DIL#Oa%45+YixD|iT;EFRDnqz zAZ50*ZhU?82a>9t)s}0VOkvtU9v{W+S;2A=VVi2}dhImj^en(Fu~nEtJ^O!@NyBkT zvCLy=Zn7=!RK?0`u?LWG*Yq({pGeNI9|&0aYIO=wF~cI0tXu`encJYvI!4YJ5n57N zgD^a2;YfHSJxgxFX;l_JW(|tb}S6}>%H{;ORj}Kb%8-eC|sq8hwg334cF^rWcL0I)Px4- zvnXfD4AqxoDV{9JOFV~9f33d!seMv3-3U*b>8;-Khis5vA$;60W)*?nf9T;hkQ3|& z|2~&*wKB{nz@o`c-TAM1ul%%)C*^==UvbWB*Y=Z00km~*Rt<2p+~eOvqa_dnDwLA* zeVk|sKpM%Rzk>DFzD6=nbx%-FmLjVPV8c8^IZaFcDhep5&o%>L?v8`+9}a_3mpr%= zv5vsZBD@SBQ+27uy)kB=ToG+&?JTImd)r{C4j`rtpGNEl#A5LLeD_Kz#JFXAdEYpE zz-Pk7^ZV8Ok3gAOh7SJ1HcgMYo7~E+NRj^rbhsb<>h5YX0be8>q==Jdc@~dTj|HN< zwlYwonuF-R4b&VXdDB5FR+U1S4G>o@u9tp|(r)$kY}H~4ibY8a1otY3<8U*EB9rWU zcYrV~AS{I(6I)j8zhMh!H}4KV9SJ8whxtq}0xy`nB2yk%@LlI>whRF3heiiy1AjrYC5n zzDLnGMEP~(?Ly{{(x{5&q{RpgD9{vK>LgJsRv{z5&$1p5TdpVR0UIry)?!dNct|hdkQ@ zxW@-Nvzr2P3YpvR|FQR;VNqsT+qNPqm@PyRP*Ew6ELoAHVyGetb(29Q3RoZ_29lzH z5=Ekb1eJJ0frKhD+A0VJP@*JJqC_PN2)t`M(>+f2%=FCjJ>T)ZKjugGVIQqhb%(vz zUhBNB3v3BeW1oPmdT=5aT!@=I_8ybsUTQNxylDPKAt7TAV_}%Ux{B|*!HL(WstvTn z`)=*ATGN!V$4cD%={={-PS3skOv^8bJDKop%G$#hz$3`iF+2Q)HWR=9wOagJ1)Y?^ zG`iJN$@T`UCNnNqGybND(){yWdbB_HG9TqKmf*)dLUFHGFo~#Nb7Lg!)>@o>DGe9X zd2W4mteo>IyK@1c|ErWm_Q3yXF8~Ge%D=^yE7MXD#O2x&Sm>CP;E5xpN^3r)ax^OTmGWg5BaP zNZg#oN6lU=UQ*saE|8>IP9*U*a^6`ZEKpE@9f;Si;AjyiNYf)kK^gCISk=${(=UNl zacd`mnbS(IK#s@-U_21!{suihiPd1B@@^Qo#K~$stb=S>lkBm>YqMkpwAOxG8UMvE ztC)ioLboD;FYE^@8Xpm!2KQ} zRv5!uJH(BBqV&8VM(969)F0nX()Lmi*|kA}o7IcYZGaqB3tHf{N%+NqFY4G-*acjq z9zLz>fj%%Jm-5sehW_Y9u=_SOan_)!)5yUds-KbK896YawCk`Kh$2I*Rfb$LzGm~Q zxE5_wBlY#1QfGVwZ+M3*lmkd^QJpZI8$|0aGjfVb6>fGDuB4(^U!o8)igpSO-o%0fA|GKMuM&aDZ9}*ZyiNJ z_bl&)?SN89-ir70Hv95|wkjSXIv2ykCz{tsR&3o7{ zdUn`o=yxcvl@1)sepi|zE@V~I;k@3q^fveY+7ap4hthA8)ni=7o4#B_p7f3Hz~SEN zu_`5;Vw8_eKBipnYfY)mZhJVH*r8`==U3Vo;c*PDg?AP=ThIQs7yhf$jcxpr9P8m1 z!CQYk=gJk#yj+<8WAVjobNPIqPb)1tF|p`rvS2(x3@(mlXfbaz%DiN();AB7Zu4BZ z{mh~JiJ4&W-e9qhy3`#KVxso#Ihb{Iae;3)*(^+7piu3)jd@Or*FGgtU(=91L?QBa zog>GoDVYO71Oeg`k|CFpM~RN~L2AC}C`H};?X>jGK$(_GLF(q0aUpCg*KnK)dHB4b zQ+JOmIWF{jiXtx)q@P7+Qd=2(Ujnx)wD4;AULee)EIYRSM1o%{ybs1%cD~mL4ZPn; zOkww2DtF#|KlPPxfOyz1UFE=h$>nsr>}j(>G*{XT$rQz9yjpMccA>D+LHp84UABqM z1{NE;M6)#=o8C7(PE=rx_eoQ-7v$HTtPB_H}j(w0EAMFyV3o9y+Q(+VR(rkL}xD! zV&|^{b81JQ@^2vc72k*r>P<(C=+M@SM-Jg#L>pB+?3AE@K~65OE!AlbE@i=L(Sj$9 z;f?O6Lyrq;%$D`0uDahTBrpYWc6)ohbF6E>aQHHSzxS_dAe2w0Fe8<)SXW2L1>ora zorTkYO?m2d^j;*#gc4Si<@if^m<_9c9aG;Gt0INH3geb}!R@49uEgqTtCOte2;C1E zU$vaOTUsgYIuh~GiSGb&Ho%Vg-hv$qXUpXSQK=l+J!fC%z5hlFj_Un8109KKXDpvdv4`lWq`r z={~&U{WZwf#i9z?tg4VPpRkE^zITp@)h8VDHyV{0;VkY}Bl(osti`XgUPBKJ`6W9S z&Ake@jm|Q|op*Wag*@z)hD?iSg>=XqG-BZo5rYPV_^C z$@Jr((bmejj%A%mbLj;$)V5OYIkZIXakUNPoez3v%$q=-(oHfbSFytgsSE1=p2pd; zg?Meqpg7yS`^T#FmWi3y7AHT%g~^RNIt}X7MPZVn?wD(s z8{bZiE;0L%iSOK#xra#fZ|^HCG)ev;+lUHey#6sZthFmia2;P&T-kNoPv1+=_Ibb3 zwy1$^G&ych+CV7Igp;ELGN?4TG5YibBpD0NF; z6$t{M?2#0JJi~d2TIBk$p)|h)+#@ZyjQuceHI1J&E=0gTJ#ID|);v@0i3bmqDsCU{ z8gIQn7cWg0EjJOSRB1a+s5{mvB;XGTRN_wCK_}X7><$)=VRtE$wGeSqXJLp{3fmVk zy2;tPq4KzMXZbOA&2TwPL?`<=r~gRgq2{`*$%>JsG*wcu_n`S$ca_Zg8m(|gqPZ$} z<38u0?h*L;>5|aw9ZZx*I{I$s$ECFkl{ceU?#bg<~qH=FbhH76P@?ErA>}?g(Rl< z-3U?8lpj@Lo8Zm0c{SbHYpF@{C3NuKCb`(9N)ugi?PMHN1gA{CN|MTx)-*lg;) z+J9tsWKK%f>xQ=%@Auz-hUEnqUawNKD88rTr0<0h3;2vDmbi=1^oXmw?i=h0$&++jj1T$+59$ z#-%3RA@9~a*BPyxl%NKg2?8h4E6oIO{t-A`X?#$jLN|(JGsVJwq@ibK?Mk@d6-CTn|%BB zRS}^>*T9mU%?^d_qFPj+U&aJY4Izqe7YtFCuW%8ugh4~GsmW__w)I69~uYAmXo>VeFy(DEm% zGf`YA77$*aI^0BmQ^GjV#6j<+&zCUPZn38AB&DvUpSP^huF&4&iWi+hPYWfxfvXzi zOZ7gfuE&_NnXP@FAcY`y8gtf`o)>*PXaJXQkpPL{bqgkr7V5*CgjSe?6i^R#aYo>kxSd|c}19? zyli!6?uIlu_Y#(RR2aqd&i6`JcgU)@ol z2fBQ6;|(M~51$aM0RKFr_M&&2`(`g?5A_R0l5b)1<;Nc@O}{r^pYCJW##{3Na;f>Q z3my$oqTBQC&V-mHzB%#|;SVkC)1i5y4Oxw+&3Ic4N(xXnw@q27xL4!(hG^CP7&zv*+`tcWr zNNHnMuZV!*t}HwlpH0C(;My6?mhGyu?K~&+v;%l;-b{k&EY2U`@7wqq5KwH|^-z=U zavjztJi@R*F~sA=nbs*^HvX3R2Uk$|m&WyJ5CWP{?_A524Po#0LqgYP^i-kK*+9sk zjo(qQ6EY3h%yfkak5p5ogcwWS8hE54K7DwP%{)@dxV)E><+;i=Zpp=W6(pA372T$% zpYKdO5xkhRWvxZc#E(GXVC8Gi{fOGHnDelF-h-IsgnCFK<9c{ystiQG&dmY!F&HF} z(aRs@1i;%``z}$(6XA!6J*4ee=&fYx!xy)AbR-O+{LS>|X~QYler36KhYy5U z93-8!4cS1*wpN%l82I-+bRxFm zPm`j1v&d0JWo0SkU~hIEc3EGVT!n184(lV8uFm8uGk73jB~h4sh#PXr+VA7Fg737h zJ2K=iiil2UKIvPi z_yIUFD7@GYms7%zt~P?_+*_BH4{e%yqaCEj<`c!orm3F{BP`sYF(^|R?{J7m2H1o^ zk-svT32WWO%x$LFyI zn)mJhGR%ctPg)PtYLKJQn`~n&OipfO07XQ6kCy7ghbZ7HDL=AN8s&5~B zknCYPtPnAA;OOXKX3Hs}|C~EOc7q8bw^Pje!qj@>j z+1>qjcDW|!7eM-DIrH%heKxdT!HM;(eW&qyw0RYP%NUpaSq=j#nqyF(3(!u$A2<@? z0iVtm9zg8yZ71+oH)HC~Jxya(99l259eS$bu;snJn&$}5p7>AC-fp?(T=9SWJrjoW z_%-9sw!YZn(Eg##{FBRH4oS%5tTpF*q02BE{P^J)kM3>dag8|aZw@~*1Ae^akqD2S z01}NgKkBUhvx~gjm5&s+T>!RRfB!fhEj4)RrGGMceXEUrn09xey(yZfh}{vUZm*0> za_rm#KH5f1-44xYF7*89H8+`zTjQs%>W`j+h%VnH`>hrp2L7RS9I2ohF0TORsfo>7 z6b>fbUE<4b_Y<=R&jt){k}93rOVEWi-DS`sAFQC-_51`}bOzL(IcU&zD-dp`{>Hv% z3y(T~Im5Go4La4|k8$zu4B!ZL_1<_h_~^lfNK>ivLaxxLZ|qNRYXtBjW%<#6d?HQN zJT*Jf&*OUy`eDomt^78mE@g7taB|r}vs)WdcQUc8JiXk1{oFIZl@C3Y)tD>A$9GUI zYWA9`jj$i(KE%0v=A_HJN~oJu6kMk+%r4i~XodNV34nty_j{xJL16tHnO{b6jD+gA z+yB~#-N>+8xm08GS3`EY?x;&^m?z|t79l_#Icg~`UF7r~GEgiwlwa1IyZK?o-JQWo z@L}Q~-<8qbPPQWyPqyWY{MHGjA31scDK1C{n(?Vhte)iNXfBBFr#i7zDx zH8L}gk)H5dxqXeV`Bot#^ueuSvtut88$^p#Y@+r?FJEf9J2!#TKE5*UQ-#l+gXqO{ z5I+)Ac!-T__a*IZ24#*flDf^O26ayn`95ML<0MQ+6!PvecS z%47SY)-W61gN4EWX2ORh-z6=3V&G(OPXC6v6s#SYu<`fGmB{SB=k>c+H)*I4T5+8B zVWgXQ%r6ce6`b{)gDwhx*ud)vHWTmq7vdD%U4pyJwb1!^tM$E4;Foxz(P{r%n1@Oq zI+Yw{l{4oG(@JRxGapvZM=W1Cx9ZC0Y!Cd)hcpOHz6wWO0eDC=*29{q4sVi8ZAO{c zqME^s6-;gMztS>c$FFhcs%S~Oall6S%kG~xLLS%TzeEi(Gx6#QZ)E8Xg|$kti*qpJ z^7z)jtRk=<9S(~!k6^wW_&UV?viojVpDk=hnSmUJa)`sZca7%|Y`a!37$I>pj~)|j z@4$rS8u2cg1dk5tWbZ{`Z=Nd75m|~bHaLEm%wpx$3E86&u_K~1?=;Z zS~d1~;(_N-)OJ-=Mgb}K8!Y-=UE^gdf4r#h~KrDz3gMN)fw+E>CY9Nqcf zTln8w_}{ni|Dm_ATYKs|Otk~vLQ^7qy8v1MaK0y{2GWP*t|%dHK9YConCxa2SQPl65SOyi?TEaZoKbVWHP1%J{pJMPNE1}x;6uEQe|ms0=ud{Cc>g&S-fnoP%12?_9-J>-P#$km*ryP({ME ze6D=UNob|>j#@=;UHUYM*XS!-PgsT6;T;QW4n{_XnwuO6U+ z73$1M!1*XM!Py}Kp5$MPKV?s^k=z?BQyg*kq;8WNq>r>AKQ9z* zutLo7?ZK6WA4*+|O)fSQ(|1a}|MJ9hnaT7>JK_Rjs+{3*Jk|sOU6)1*`w)Xfxe#dE zZ0y{C!JN6w35J4JN17Z45>eb2(!RsrVwpCre$$Zv*XN$2bz}cVIL5jN+ui54s4=`n zF!xekfXmaPRPlNP(;E`tSNm=FPr~u?FA!1iS|}gR7WmW)LHY2G=Ujf*gry_&vpt7WTd!!i?;J zzqy1A`OpNt=u7WsW%Cb^J{Fr;S{-|!X7A0UNYj=d!E@kjc9(9qv>_qowM=vGI1d@i&s8>~y}Oi^Bs)12{gpOb%eR zcm39bpUUHfNwMjU#f7H%xsFARk_G724@0|aC$zM07ed+*-{?262&9dCBlksK-ni}- z0&E|yk_~-x4FFroYnPd9fMM~a$QaYU%dvj)IP!7=pk=5fd|wN{|z%*M+( z+m<>fgsR4B9^PUbkk;4)_Izi|9Mg4aoZRN-hs+z>05k~AG#Z#?HJCbkW}Qa~{@hTw zS4O4};sFc6I_?QzE=5Bf>n%m}*9Bci*MLPb^mQ=023}mJn&29&@ZNcl=McIED`Eu> z^BjhCWb0E}f*lmxOE^Mv8dqQ{;vw|GFB-*S4E&!>_gRM9Kp;dzwXOY zY=M&Wa;?n!Cp5rH8FU8jfxqkl6scE3VUm&$rj;DRtf!#gOLtE@lsbFD`DknkTqdw} z%hfP@4{T5mA2hGn*@;mf!1aSKS`@$)WY3qLE+1Wg$uSS?yHAZhP$`Ae;j2tXu-+<$ z*~Ke5czCaCk{KiHla7Zr_bVzGU?SJY3)Gf8#~P)@66(n;k$mZ!h?x^en}-1vb@d1W z<>rCnaCXuv*9RnMqs3R5lsf?061%3C6sY$yiU3ykH!!b7?6Xv zmd2Vi8+G|LxI|uj4M)aP?j%t_f8%AK+>wj1iIexu!4h;`%rp2z4t=-$7c!~sz<_;$*`Qd= zz*U^N#hLCyZ$$1KMp*-M2%cLl{OH;n^_;}Ul8Us;Um#IW zW3X0lO0CmiRZQX%J2%60yf(c6pG{sE5ptE0xrtiW@XlGGxZ!Nu*je}cg{gvBjg|19 zHjxnd01Q&;SHLuY6JyDJjf#qD56Q9*BP^h!( zt_sQ5p75A=VFph?_H^?`*FQcw{j3c@&QFa1r@7>${IGfr<#aYNIi}uuH}OE#ZI3k} zV5}=$)ay7E|K~_q(4V^OhnL z2~(vs8_%-C-`pwnc5Zr$cn3h$g~gXL4kkk}qGf0RgSH#vP_3ZMsHO z%V)KQg)nfAwJ;!E2ZH%d)g#Ww7Bz4|+Zh*>06$mX?g3npnqs44@h~GMa1Lg5=fPI< z)+oERc-Y4@8IVyFq4Xh(caZq@K9EbcdiF#c1T{!Z1J+;^Z)UW|RmRzbHKuGJX|~oS z3Zgt|xhNH@u~g*^Fz&UPvgbZ3hYAz3@hi=ezSe}8#15?xjIOH-lcA;*%^gNgA+mIz zJkyxy8$qRDiPFCD829NR=?Fj%n$37U4Ie9Qa$;@mdU5*_ID@8_Uws)S@PqsNL|&D2 zFo6?B3^bqZeTSYj+odfWzOnFZjmRRDtm8Y(tynxETxdae^$D|p4XN9HlvRGe>)rmo z0rU?y1N=~8G_xq{9ovxaAi3H+4QGaG9sm);_9TlG5c5b2A-pLseesshma>z(N7I2g z=Nq#b9C`XO4rs4q+nmE#18hUqVZNi_KkC$87?;pDAxG*iwD4wP=gIt7)YMwvEU`E(qpMNq*@Yvfg!a zz;dW$C~Y9FOrjg^o2^q5<9*0R3_GDT9ZoEtgjSiPP1gNUKH4dgKS2Q*eE8?`s?HmU zho$Q8+Y$fPb*rxXRak0uv$fTTngjP$%?A&U;rMNwkSJ(J_Q8l6CWpI%OVWC8kLILs zkb=Sk24i{w&C`rgF6{IZDuVG<1qXS;4gL!1cC%f+`by&xDYFChX`!ybH=dnefNEjN zfwh33zuK}Uaga=CFl>l4%VK~@v7uXDChsG1up5BHYHM_LzDtW$#o4;VQUg*FKk>?t zMkmJ`6cS||J5|`EN{6DPZgx`J%5`dU%Qjg{S5<@@EI$vgV9pocha-hg?-B#U;}0k7 zMB^rt|D|!$9}CfI0N{w^GY;s*z}IdA*v%m%$)W&oAS^0uNq@|9EzFZ5{T9h9dG}&V zCe%A3_w5-KB29(We@?d{%ZFF;i)&mr)b1#M7cwNUR9zy`p=_)Rd2q9d5SJ=KZL7Qd zJj|Q!i0+pPA}Ybj(bDU4kwTZr4Cc2(o$yvc%SFBQxV6=fNQ$%glb`jZr~x{32$c{X(! zwQ0<{z5y<9LQ_C2oX<#Ebv%w#z%ZHqq81HPc6B=6 zL<&PpPncIzbxIy~LN(@wf?kNN1xLQ6z7d9X_-jE@j(D69EiWXHE-vHYKUZOv;aAA{ zkddvoT(@JBgYO#;uIc1nEn!!DeY;M`J-QWp9Yh=(H31&ONRK=iwZ9=)km=+GQ%=1$W%nIi%h#GSl7MhQ@ zm1u~DDbVN7!0k4V+?2NJE}ro@hB2oIA$w}ob-JB|^~98c>DY>l;R1glS8z7YXRsF- zCIP)v0fB`m5z&Z9>EPW*)(_+07dR1;Q0AtyT-9U|DT7;Hnl|j6pXyuSSWI~KD$cu0 z#B%vnNQvbeAPZ8s8{C}V9ctELN>m|_B8^IErt8#@O;@cxr_qk+6y_toPeXL1cVNsI z9k9WP>yPkEyd}9YMbP=;a7zP2N$GRKje*Rxm+2v4=(rPV&i)Lr<~A>^T(>pNZU{8V zp|KC~c-s&KRG_=G`loZ!GLy2eH0vyC6lFgDh>V+=jahSA8cneyL<2#e7CF;aC_+t> zb&s5=N(fOP8p)LFm^=8XhYaL7~wGUVawmO78M+)X|P&aEoOQr4APoRc%!V$(EW_@ixXP)`oK)INBh){-cA0AFRP zlaumX-q#N=Fwjq!YPg8x9+*h@&=qv7g-Wm4 z>loEJt5+z7JXNnRomp`ix5e>mG8KHJDaxb<7KYLFu-76bCoyS?ouJLS@|->}UpE_d z2Nj!oneI5@-2I`nZ#LC(aVuc_4HH~WmwT(1T4yD_+&B?$b!LVirU#20l80I9nScWL z%Q<*>15f-~A>IGHI;04?`uTOMR2dWG9U#4zf1_5zcS@S}`521>*qVAqKQe&YCYF}1 z-JA>3xgxaq)B*DCT=?G7YBX6 z#|Yz6`;P3JX)ho4M)JC@+b&;VuWDZvMHqoxfHQ4iSQqVQp;>+u+v;4#n2uPo=Z?0y zF8$@xnN+6UUH-zI5e8OU*|wtm_BoyJ6m=c5)JR({qa7hUdPY>~uH?bv$0^RoSz|gS zoChefr=BcH-#d`6s~4_;O;L!ttVz-Ny(bIk#T*Df(p4WvhU6%|2{6~9@{bTUoju&% zKn_=6k4YSKsd9vbCBAhX;Oos-g^pz!?DD`keTe`s{;t$Hm5u}7KHg}DujL?q(y^R9 zraSHB1JcP?&f}u#w~xTp+mhn-`nLBipi&3&bmA<%ZJx zvR82Z^eF&I4J;|rv}LThEieD>@vx|Q-w0!@vm33;Hn4<3GwiP8Y08FF3{BjR1pP_X zR+%((1<0wbJKeQ3VXXfY_~NY^!W^v3IXpZkw6v{0$<&oM*_`3JM2!;pMO|GxiE$`W zDmmHKJ49~2W>Vp#fX>y(y~(r|`Xs~a394OHNF+QIc*v=`oXJzb-^!eyWqO zmMG@Ztn)M;4B+`E2?uTDt3b@Dg=@T@iXO;L5)`jg%|4JX;f=x~!d zEcAEmo;P79I&Z_WDCNhoCqFPQj_GFW9c<^3_^6|9Fg(!+15-Hf2F2A^0B#R4g2V{w zF8f+%dPAufhV8=YN~En=gE+sy;x^*5Zkr+h;aS&%*J)eaLND`Wll-zqmX{xGINLy> zV>vVx3N1~$m⪙+>ZI&$RopbDYt18Z>fyS?CbEY}hJLh^#*a<<%o~zM z;}+m`_QYRX822ip2*l_fU=E8Em=?(wXvEJzs>}H*NBO_>%JZdcYC#aJtjN{Ksg;?w z_%+MgVOI0y567%GM_@);cs#$&`5eJ8$o*6JKNlvlSdf$@;u$X@?xvDG@UcAya~|2G zYq17`X!9X8^h26XO1%$7qXP&%8B#E3x}g2+Fk}82g{0lEiug;Kc2acWVc!^_qg8Ko zMaTAc9ZZLsw*=Y8K_RQCAkGTxo|s$@Z5zt~9jS4$JAFX`8prYK&M)D)3Bu3i0QuR3bW`@DEk>*Xs z5Ap$75sk;TUzS|?t0rpfSi;Wcx*9^iCW0EMdD`XMbTQ$NNn}d$}@~zS*C9ix0vh zzXv+QbCC#bY#H_FhUp|@o%pT&xoxOT-my~weU&|TfCKYQu>H0dj@{Rf{nUen1srh~ zMF_Bf)Egm%AA)g;y7F`q=d@MBp8lV%NrD%Gaz#=!L@HzdY7HCe}X= zJ?fli#{xJ`4A^k5geRBBnF+;lgyxM{PSlGj--Ou(g}HiP*P6Ll37Ep9_HG-n2!A** zZrl^&X0^#SjAIJ`Q2;^2JBr*3H$8bd`Q4Z!-?Z%vhnEZ!U;?|Y{|yuPy7T}7A&3d< zZ$Nb&)xj!}dxqt5w&x_bdS%oZ73SdjU8`ht-gqtf(kJmti*Rg&kn6y+DHK}+Zt0tw za}9PqKLXPcIItX)w_65id+3s<2cxCZzd-jt-TL@?_^6gZD+qPzs#*ifx>4k(ZtYQf zOU<1RW|du5d&eQhfRHYQ4BMXW{>Ub|pyzfu`?N|A+s8N7Gq3k8W1@paL?PlIw+aHA zaX3(|u1nCG>k=srk4sTvcj%dsH%fm~AZg3e?G2l*>8>hyo((IYg5KNt@fbAE|0R+6 zha>WzfhZTn1#d&L0|R|^A>@M^!^@JkEm&{PFPkFzKaIcT&XTxk!2 z$`&WD{T>|r{lM$bfRq@M4tPh_lKyY}U}yoU|6lpRpvx2pN!h`gV|`KlzBP=OFTiLZ zDYx@Dycki?j+J=WD}R0cTui7;6AZprAtdUm0-6l`48Z=OANyx0j#X2>MGff1t5Bi{ zX-b&B(>3BRw6!pr1d<}lc5c{#CV^)E@g(rip0595PXDiAP5@Y-+1SP+(CF0eK7xkF z4a^0y=_tXKCFBWIMk5!lBJbT_0Sja~fP#VyYI>Br^jYdzJVDr7h?LOVu5{n|9}soD zU`&bqm#_GVP59d{Ei@bdP|W=8|Nd4I{I1x8otWjWvZ!SZNHSZ4vvOVvE`4ZyX2Y%9 z0B5*~TUR)4_y(h_*pV+Vq}mt6D*4Ko4ZK~%+Tb8w?NaR=7G<2l9Cvl$O8$E@|3bLC z<$l+$98M5qDmkj_oz<(MTN2(kewNmI=*6|2!9uWN6|UkB^;Gs}1C)1kLp|3upk~~1 zVNiW~ZO}XTw1s@IMGSsz=|&okzvJkk-0|VEKjQ`C<~x6cdrTp)19($ly?=f7kRTpe zlK<`5i?WKY8u!A3Q* z8CAU(FW19ruGo8_eh`}NT(6#u3rM^ zB=Z1F@p9v=Zp`nv@b+Nlr7s`sp`szp&+a}JbV1MbO41uguq1QHSp=Zzjg2sf@Dq`z zyY>#%Gaz!|?)xxoyzJ z_mSRr6d?ZYXgFeLKC=sb_B-t>lHv4!x(_n@pN`T${gQ{u1Q}M|tl#vE9lfjE>XOx+ zAgm=a2CV7Y!C0c}BD9_I#0<2r&fZ=M-#8y4S?b7gWob9S%iIpdGZrGN5)|z^0u0V2 zB;n)&m!!e^G@a1-tKfnG-aNfd>lMExv_i8beYMnNl#R2!(3n?__)kYwHPkgUSGYprW_L-gDCN(L!O%nCj;kL@Ul#(lS+6;7ZtPR+D zPW_YX*o{6{^|IF!f9$~g_$C`TJr6a52b-cj7|C2h&Q{pm*^HcCh+=hUot+2H;*wCt zqw7?<2blfc{gyTm396|e|1#T16M&fxQUkJPDt^!Ix_X z!05|%m(M$!%G_OBwHE*#abC%N?iw~#Kl6jja{u<}CL+4BzhnWzb+L>Qtv6YX$f;_C zuMKe`UxB*DZm`OWngdKKqo@zC!oi|TUq)7}-D5fp)Gkpcf>^IT!K232zrb>q3EBf+I!O22s==oWf*JUqMhH`dCW37G){waqOO z*7E=4X*wPbcMadyzvdry*x)TWz6&&_2=Zj8=kuyk`#*p~A)D;#Ru~Cc1RHLKLp>|6`Ui;q!4jztT>X_kz!i# z`Hb9ZiRMvcfQTju^Cs@ci-urOMY@liXY6otnu=E7v0b0PjdsQ2_dJ03bZ#wBy@*no zOj;S#c6c%SAXAz;_VBVzT)F!ATSo-k2KJz_$Ccw1o7Q71r~@qoOQJXO&9DbpMMhxU zPbhQms1r!@q<@(Lo=SXX0uBbbO2oD@;NbYKi$csL-<9po^(S<d&=GCC@QnjT1b;l8 zfC$+3wz4^Vv)M2U{Wid>9M~m^Y_qlzgpI&TX@u@A9TJGr>2QT}XFHq_K4(5&a0%Vj zp!n_MvUJ`n5Xf*>4Hs#}PH3M`Y=O7Rp#hv@$XfRRF>XWNhV$v_Q4;0OBW=yyT2$FD z<%hB933f@8f7Jx(;V6e&fBCCSlj25SzkwI8;-J5uj^<~>b`bAJU%=9Ini{yl87NB* z&<;f`a3c8dI!E}^;oIf_Yb=1c&x#7IwvwTl*OD~_Fo>@A5Z>g_IDYMWm6>hM!7L#^X7Q%@)AF>+ZMNzrwT? z9?lF)+ok&i94VMo132AT8Re)RH3rM3v#wtc?bJ3ZwL5Uh z_a?FrfPWg)o*Q532_F=_0!gt3mgK<04|9S{|6p zAPWUrj9Ifz>pfND@F%$71A;h4r(hb`iQEFf7BKQWjB%}SN7pDunw|fRZ)dhgKW5hS zjf%U0d*fqi_#IpSDdf8+7*7tudQhZWn_%87-#UC1zUn?6-~!y9zBfiHv^6V)F?*!2 zt4w)G#FqmVin~Gbp}YKdk`FhX{oH?Qd2&?LQd8kOrEYOW3V_LZh{1HZ=ITC^SsC}H$hz-rU}xxv8aZzt$z7q z--Y98;n0F!RtW6c2i!uY9-vCO`m1dxP{lHQsZ5Z35uE5h2q4)qLOfbQewiVc6v;tP zasdU!StIAFn_SeqJEQ>I%iul5QfJXqIj0Y-9xTD0X`l~Y`4|x41KptR8!mUODR|Qr zc)O4pn9xP+nH!koX5c9*YS_|-(egL1VB9!4n=%D;LNPLDgMkB;?~IHy71Lnf;Y$OM zIKve-D%$$_!uhGz)(+ws!OesNU=zufes{L|z?Zw%W??^Ul^H9i!XrQz#9Ka?W+3?f zfJz??`Vqueaq8k(J##_XHOFBpfq`E2x2Z%=ynrmFxp?+N|58}%mAjCF`?1{bdk4#! zCBe=BENgi;@MN3$Dg!!BCNIhc!2pa9Q=q(fgg@C)v;X7^{GXW)E5ezMh zknkameDt*U>!;#m28FIginVR=F=?O?$!GyqH=RQe;L>rhAkJ}G1TQ6X;=xG(IUnJ1 z$kQiV8`tj6{*gE7jIuNM(AWiTtTCf?h->dlJ&zcAG;Cu)5l!TnQE?Lxk^{}>( z-Zhq=QCTa-+{p`LS>3k(7_@;o-D_)O^H_EVMe{)U@gPsY+0AcBIK2*8l6A+Rc5T*| z4VE>?%+1Ds0*r88Bp&hgMnTjF&prTrl2xz*Q4Q@Z_EDA*j>rIrYMrS>G_X}2hlSTr zS{=I^ma)h4JgA9v!?njF-=+?~A`-#d(_5ppvcqMNHUeB?R_iJS($se-eAJGHC#DJB zYe_CW`Pf6FV-eg^)yBfk;^Y{YlMS%b8?6DN`i_qC8x+iL^8uAds4k9WB!c0RyUXIj zd^-{?0L0r~G!W}SoOu~1rx~t9S1;*)%b>QlM2tt9>ommy8B6UDjmm@SZv+;RJZd*` z?HSZ$r|@gs`wS1Zym>0MnTR8gLB6}>H4XB_d-(PA#CLFdsS~`0+zUjqpA#7D^DyYL z)$adg<&LxA=2_HzrwehshZs3%ADgwEz7aKe1;YHPVhSI*+3B#c&gR2@r^C1tT zdF#MUTDl1S_?^8let()GXfOhz96r~Juo0_}eUm*DZ>(I;SQTkY(8dl@a{2(N{S1cE za-sZKY<*1+c$7X`@HYZ7>1DEQh-_BbtS#3KeOPqLJFhKUw^z3eGz%Z*L`uFstvDR6 z30Hi)C|>mu%4QhuKt#lR!{5IF$ssjd z0{bNwE;fVx%IO#hOFu3Jm%OS-e9EAW_bAzx;;d?t3yE*OO~UfZeX26yl`ZhjY^~k~ z2CQ}WHiK>@{cV~^9nlC6v39Jh;f;ii;TKXuSV@c4cuqR%<1UW|{QgVB&T7uV4ho`yeGl=^S~k&zRidw;&71Y6d8J>xC=;83$<$vevf51?p% znRW=rz3@&U8UsE+!^Q6$3e1N<#m%Lw^1uH99AND<)2_Z{7NLL-lWasKky{~aYSp=+b(tAFOUu5w*hOAod{{OH*{MQ2Df8PS2nj;j=R*|p} zt$t24qx`r}*I=KoiFFxB8$k>iviTl_LtiVXgW6GsD&U5n;RtWPsPT{(NDQ>IXn29N zE+}e1SO4_#OP~{JEPf3q=3!A-{aiYJ(CuhJM#omvNtkOE$|~v1EE*8}p?idx&6~aT ztZaN7<0$1_;rrKTsEcEu&fnkxiSd80w>0gDt^-r?-k|-x9QlxYpa-8R6LanpxCp%N zF0~?qlC;ZY*a@yWlv6lSD?jG32+((m};lrAo%(k&lpo{WlqTD5O^0K-bZYBGpRWVXz0mNJ@jUV1^oyWv z?NBt2K;v#lK8z-d(NVzSos?m%zF*e;L1^Jj{ir5Bj{y%;;0ishb|{OncQfAnX=1E_ zwy5BTceeX8N8SDkg7MJUe9H`~M zJoiPBMyy+({8wN33C|ybWHKZ*ZdcN2bn1cC z+3Wvf4>5trsur)pe)M^5E&VOKsxM=+UZE zX9Ma`%N4o_2QI;Zp^3|gpm+7_W26X0Ns05*j~8Q4+rwt8c_Undi%AihUb3J^GL+Wv zjRD}^zv@xhmF97{Jr|)FVcI^8d227~_S8f1EqP)}{*xEmJ+B%h9>qs>i_VaH1U+@I zV>_;NZ}iu6v=g>3e-X}hgWUK0%0F%ko!+1fR!ZvvesZMxsbRW=>D>m7y{E3RMiUxF ziUvi2JPzlmksB0-;DQ5PU!l7boi`0FtD>X`!HWr@vyMV z5Lg37_H3ZQ2uW^1Rf}lpL~Ri{1hu*jfH!_S8j~YS(c&qv&)0f!(kMw)v|jfk3>YQu z=!A=N;X}=-6E`T5oo-#U{mIQk&L+{An9U z{@R1IZ@4h&j&TVFw4&G(q;$5^vQwj_ah%zXa~C})s1~54>v@n){%FiQK~{lGasGGu zg)eDiLnQuK3#BG!=f^hc{n57gBbVnJj}^edp}JrM&PRNMroiJ;%>8@wW!S*X>i1nG z9%cX>cdB@b|Fh=#E+!-U!Xy$E%>YQz_{5%mjoObNf4LExhZToLk_u##0kGUZ{T# zd%I`rMCI<=d0k*ddnNQMeEq%r&D!Re-SMeE;Op@H+vo@!Rh-u=eSd`27=tr0cMBsFKY;e$U?= zUecUGxV6L9T;7idPDRV%|lVlYVIhy#zWK>AYnLFim)x_ zP0|G~;W^3CAQ7^CC6a&72uz0Cci!YSdyvoqF&Lj<2*MRAL3APSqns>*(~$t#IMhOL zDTRFcvu*=DJrA z1CK9rO&?mSGnl#89?*76g=R?`4b!PjmrkNJeD%4^?Gd_(zxcZ3%Q&Vj^Yr{;?z9HRTCr-Wt-;FyY=9d^u{uk6c}x zi)VGg-S$N3TqRG0@}!D1fw}kj97> zit$ahF_+v=c8m`Lpz>$|t2&2#@Tt$}?F9}k9!9NYsOyrbS~LaCbVCPf>MHA6{pTmf z;ux8cA5*CX4Pfv-KMiZ;Wk;}S$2SXZ91oDJel%R^K7PF)>LF`TYRZCHE_kx%P}r+% z>XM@-(uSWBKY;pG$LxOAlz?f-c4(KmrW@kAK|=;FFW2nv<2JHJyOdDo*@3ZyQ1Pm>&RO`SI)7VbMRqXeUu3xmd`iz+dJ^M zLpPLZY+BB{+_U5C6hZ=aFi8@E6BF>Wvk!ilJ%ja{n-$&72VBjRURm3P^ieP!b~$$) zb`5HbXyG^rQGusvW2GfHYaG<6~bVoks+j47f)D5@5!5OoEpRj z1gg{#YCWUArL*`EX`*iP$B>QC@S(O!wB|zrTFWTNJPO*D=di53ii1wCL#X)) zOHJ`;`71d(&gz(SxXNYJb2Mz0?EltD2UW7m!OHZ*o4*nhCC(5X}u z`8fPg*jylv{vY<fkCM(IOUX-ZX)-rlvz$%#qK@g(Ow&-eY_H~-{k5U1StzV}}1T34A# zGGmJXbV1*u-VZ#s-^=kUf%$Q9OOBC_@|qVf-wo(OxXd8m{nNra#M2X@AKP+m^qNku z-MprqcfsWK_#-)|`Eu}tA1{5e#jv?~wS{K8Yks>+bjYCri!o2IhCi) zyI6NDQ6JyM@dllWq7tcRAflP#Ss$S1$tp9j{HN}}R zO)Fvjls+wYhjRIPU-ng(eJz14{{{&C{9FJ}E`Mb`wHe&Fy~1gYduX0;H+aIX_^RFO zMfL_8`#^hWX;?TgvR&Fm4Bv9zu=TF0Sjh8fz@rL~^MzNEe+*P0rIn#ASLYE&e(~`` zN1fo0kQ{Oe*`g!qx!SGz@OL*r?&ktcNm2M3dnk8wd!RPa(LbebhQ@oud$K|x84uYW zUFh;DN0}plU(7#@U(g;B(_9L>6-qk9Pt(U>e55Q>dU-8KXgVL5yn6$3X+zida8TOV z^df}$T#wDHLnWzj0z4(OC&!jt-rxCCp&SqimQ`iX%DW__Fp-WeNrJe6Q}$rEUKt(u z0@GTM44cDFFgfozqg-okQe9&(vB#VEcI!sJb-v4hv*pLO8JX`TrRmn8gI{bbu(f)3 zx8s_gC|{78xS{u@3GJVzJOfY@G01mr3~GN@q&4HmxAe{e;&c&?Vb9nf58=8`)x$=y47_AihLfGb=wZTswwHk+qA)0(M(5uuewS?s8+I9eOl@uAKekWy46jJE{pF8 zRPfm2mNuG__oi`{1dWe@B*HfhuY5?1&AS>06DT#Gd8qa_-4WVx&Ab|AV)4Jp`tj*% zjnU*Zm9+Ey;N)pG_dNTujcX?j&hh?CFy6WHE^{$zkCyWFO+0D`-=3;{UnNx5X1;eA z$M4>MSh-732i+KJdw1VfpCx$2hWK|3@K*3&8P(uYIfYaY0y zVFk%QA-Utl=KXaV($w-k;=*oszscF?4Y2g2oI^tlw1ge;X^YKLCRw ztWD84v}Wu~tFGN!-3Q(sn?(06pdw^K^oQ7cJ$rUQKSkj2R#~gVrrG&OmfR7Sa-|pC zax6#RL*ZdBAO3L~#(+!HyCusJ2*|n!quOKb@Dg5pB|UA?jx%bIkNElb~A#F&>cEY zg0^qZ4pW&GI)*woPKw+0WT>S2KLHbr{&hMpG<4i;J4-!F>h@ODz@o^Ly?l;M#rqYE zUCw1+hW^Y}5Juw?I#q;?Zq6An&TF2ct@{JKtMI3YNdDe_Vw>WE?g8PTo6DH+pvUHHoO$yN``+KWPmL;r>L%_| zptElA^lkCcYUvfo1AG^yT38jRq*!Ii;?ij7FI)36mS#zeiTs)~LoI85IPoKD#@o<* zrBin~4g7gpC+o}a0LDz<-<9T{*saw0D7WZ(J~TdO6tq^w-57#`!x2$qkfal((pfTJ zPdlh`xS_G3WrrL!+-c~Y&E7zTZF`0UA!AM>%MD-jTutYY&MW{3F_&rJBjel#1@_d; zCSDCE2ro+A`Vhcp%2Z0f8A!b9o`)!#Hr#fFT2aiaA+zb-59-iCd%|cwkISBksu0mh-$8we~&XhBf9E?Mi8<`F-_lQqFv|OEJw**6Gy|P@37}Eg-Ifq%WaI z!Axqi3e|P(&I!q#HkmNWs^>*o3N~c!`K$cCzXJX9xmz1rb1)s{ePcoUzG(oC0jn^1 zO{t#Et`M_zCb1s&6nxAK92Vi55BHoa4deE_R~B|X;u=&M8)3r@XHJV+*Y#`x%Sg3= zAAwmDNDof_$I=78CRfKw0%-IC+#x#i>35HBacLdLo1);?;bSO> zbB*95EoGZT5P0#1?*3oK@@;DhE>KbpiwgXPvN91!iO!7%N4 z1O@}%UfQ+1(}q%^QV_Cv0v?g*<0)?5Aeo?6MH68IT)cxUwe`OHtx5V{IiGG0vUjAj zzh129@O)f#GR$+EJl|&mmpZeGCIiEAZb3=L+GBfm-9NsWc{Ovbd!>5lj)ZGs*~{+V zc#!48+{UE*e&eov>T8)&S6$~9Vi=xv3mP$!lyr9;t|5fXR=atPO?O`_D6*tYFIbXj zH8Ry)KA8Q!22=vwUDswN!7Ct)xoh`XZJ#T;dc!e;O<^lZM;_l>CXSu4stbwdHnSGm zE_DIFeB87^?83xa%gPhXTz9!K=UyDRior9AxhF*t8+1N()me|Gw_aVg5+|5Y&0)6s z<92p!z0PJa_x`Hu?7OtEzBOX**a4y~yq$n!fAw|-PsVyup~zeFvP_vs7RGR<6k%{Q zAuz!|bN}%2NZtOF#ixl!NS23G6^}0S-Nvw5z-0qd?dEv8DAgz<(|>22Zir?37N$dY zZFX5Sey{B_@-nh;raZhDB16?CVPtwbwha)du9@)xz42^y3Hozc2(i)G=?-{F8`L=Z zKj8%6$8(brbpj6K3H$${4v?VuGz;Ins5-^9f zJMB>L+e5%(%1m?ok!vw?LBu>+C@<{m)Fm@!ntd3)Ypc_gHy+RSp6l9a51A5aGtIyq z83YT1+_XE?DA5W~`QvnU8kz?y9By0LvmCerpYyk5<)Mnw9#Et7nPwmZz3+heD*S$= z@UtaDzMTW00C5NOlqffW2rkfd74(|{$MOcurc@d#7CtA-Bb&FRelu|XBF?stc)?ZU zv@du9gev$ZIW$w?C&7gR&QKeGphn}_2$n!ZNu8Z22UfZ#R69-11~#Z-W|#RQI2Zr)EQU1eJ?@NV*CBpStfR$pk$;-b&Jb(`gyraY z+P=KI%l=`Y1YB-o*Y=(ro}XW*-LZppVGk!WaVBzZAxg##*H9tR=`psBb8-j3m%J>I z4H8xx+bklC)8u%lVeoo34HXH~7lBC$>kQUQteC%UYeLkgmGVKUi&JNi&Six;?S03kJH8eCN${`|R0T2e@ zrXdd|s1#4avXq~@m|rbYb=p&nW3MF@uQQ&#bK&Dn4ZH!-u57MHsJ%K$6z+G)h$k%r za18jeel=)J-*5Kp=6X~IXU#MN2>cevKCo=zPp_4ax#X(jGj>AUWZIod;gXEJmb88| z#21ARJDVn#2UyN5&sn^zbX%a?G6tqC@yo^s8J`b7H@uWnw&{e-at5Xc%olGTTun?n z@DbXimY); zqLXqeMGXbU4WqOzhne3)v8oLx69_A?b8$Jlf8c{j%u zU5}Gg;7WTOBjwlzBi#PQA9wj+<{3Sd8Xa_d^-gr0>T|TssOA%KW%|rqZ#|LT*d0ZU znNipDt7h`u3_0n}iILm2!>^D4YcAxeuw}(?SwRwXLf3e}L*64w<@F|qaj(SI^p{<< z+Cv{?54su5ih;ZK_5Z}DU~;iE19gHz=gp>FKFb)HxqkJZ)HSaLqpmznU;jrhu*~btKf!*+zM9s^~-+wF7cWeysXt|eZ(5kqrd%}U@QA-P`?z& z;=lc1U%tYxKJia~@%_dVIrK4@Mon%&n7-cw^Hk>dTi=jU<+-u9FdKYfl5=?41MEeM zURJrWFs3mrKvpkuzkYg~ zAt_9nsDO!HHXfaKJaRo_&la>+OkLFU`>8j!7@TP8hSQCCk07iWr(&;5Us*LPVF-A` zz`)?CvhYSmX0iQ`ElKB&uE1atA4676j7KT#06H~yly_;}dEA_BX6)!FI_oHVL1wG( zJ?57~$9l#D^I6-6*X8`o1+X}_%y$MJqV&%6>a4rPi8X!A22tAP!_1+kJ~J6}>^Y1~ zVym=h)+*1Y4YY&!R7>df^$9k(>J{NE6&ZT>2pPbU>6)LMK?Vj*aY^1jbp>iqt zJoPtb@)cWtVM0w*k4Su1k=)R&pTC@8UAMuBDDm>mZ=^+%GE1|qCh8+ri5`9Y0Petx zdVNvGPw-@P(YLTih0H6|Uxtf+oi)CE?fn$@weYpNkNy3%{=Qp(FT%f{t-rKoDBhzI z5Z`}m3k>{!zhOhg73)!Y2Y41?V`itA>$wji;fe)i^F-g3SCtQ}jAYD0SqkJH=O>a;B(l!N~L*KLm`ldfTtH1sig}Sh>XO8!^|L*ku z#w+=vJ}~FnA0O>sIhfHe`@`mbdfPf}sF(hrEct`KxG-V9^S2eoAN=Ys<<90cPzJdH z5Bka|0E>J{me$P8o8FMDgdBBkkG%m~mP@141aQ#~Tu!1SEVnQGz~c%D#yRr5u_6H^ z{)4v72PFY1mOTHQnaGsR>FdWd6=ZuW%SkA_I9W5UCOfxwZ^iWuCsciA1T$pxSs0B4 zF|w@-P1+C10q`;WK9*5Mi zfNh2$-{q^Tt6P}JIVvq<*vnY&pmOyr2lJlY7+Ipg%fZM|U@#3xv^!~kdy8LN@eH>1 zubqf@dAZaHxP}#U=mtj#HC=-~M3#}HZNRNozy z;ftk-Bvu-Iy`}&oF){aWKEYo9-ysh0u`pYiyT#qG+7IZYHBa9Pe*GmLU&laIhHmhB z2Y^wHyDLVryhjJ57JgYJEy`d zw(lZv!irF^!Q#2!HqQ}eHnDc2)s3no%cu|p1#OJ?szU&I{2;=wH&!oo&=6+%?fd@6 zM>}p*Pp&f~BZI%MB7rqU>Fi*0b|#q|HBa6lMhL(tGZi`y^vZXdmEY#8ydA~ESO|qj zeVEc9{X_>$5yVUd_vK%=X>S%hi56SNfWif*OK(L7H_Ct!_qE>-@d+#QHpW1?%;0b z1U?MO5>GZTW|No8;Gq=?GGF9sI!Z|wPFmI(EbKfUDCoA>uUCfa1T*c}xsfXp0NU}K zlt7U0Nd~D7_{R_Zi+3E z_nQ$2wC0~*@@s%03tsk_z6c;b-OO5NvIDs8CM}~9ta`1Tb>^lX;bjK$xE}i&t>Gh; zM*<*NUf&?;0{EP5xSM($nyG}qOhuQWKR<`3qIknAXc344TQw#IDA(v*XO(?8MCTs? z02?g)9{{j}Xg*{sZ9S6!xF`W|(HsDC!o;O|1AR2mJtSYuG1c1vu#qZbedLE^b~TUHb{Q&;cB9Zg~-$$kyvwwJkgMQm>K zJ$5x$GxjudO`?lNJJ0^FT}@f|dV{1ST4e}tQF!BYg(&Netm-$RPRKA z9slKPx*OhA4HV(ti?y=h6baSQB9xf39W6WW$c=|LmOgR|}Lss%p?LQwf9L{n>xybKVz5 zI7mEG=}`Ph@x^B~76x~yB8Dt#{Oy=!Kw*PCQidTBB|H19Zq%d4pepV|Ci() z@G`xG3?CMs`7L7{U-NOhewluh=`J_bSXy?jNb<|J4g1RAc$gM7U(S4CBODK*eq*^w z+q6qT@7o3mXU^(DbSQXgVKY^(7}%=xo9z^a7>GM&l9HIXH)b>1SD(RW#N^KFo+ic3n8G3J&gud412-JCf<5OQ4_y+neJa|Fs%X5%qsBVHzo-xi6!_YC=swQ}-Gi^@lc? zHg-o!G9HE}*}3OwrI)eWK~ARqtF+9L;v{5O9Txc$h5l00H3}_9^(K{pOE9#jgfwtL z?WyrR;=fi5qLaZ_20HGs>$O*6u(U^`=HkquIzF2|S*QoOamO!FlMejr+X3L|9G&LX zdqYERy8GfO)Vi>&`AD~oyvx9<#rF|ZZ0(hOvyd~U{SW*U2ph!cLzi~{G)-mbVMGL6 z&}n~`AtA4<E;<66ZA}kIwDrC{F@E;xAoLAi9)Q4-06VDdO+~H_OZn^z^Ru&w#YlPs!Y#+0 z9v^*w<~UswTJk1B-eA#Ca||>P^{AhE$PlG^k%Xm=Iu7*@fw=Os`j5492dHdd*Peoy zaqdVSbj7+@{?~8&>z@5Aj1M$PuSpkwbc(jk}WlX z-fD4MlwObSPzfx|0sI&c#ayLhqSM`gnlxOiD&!0rKeD#~*&2xj`ua$q;>tCqk7aq! z6G1IDHm9Q{*?jgp?;B~jbIm_<0cfBVd-NPt3@}A;BGTHi(9_QwN+3$P*C>u6NN0Tt2)W0B0>? z_JgK(KYC!hb9?#(*QLH@i*HGKNEidu15eC+L`H&4bn+x1~;(`i3M|QkkyY%fT4%x(2r||m{&uQm@ zENIHGxGGGZA&53~U@iZzI#Wgo1t;HcfRXt_K0fCX?G6Helk>lPcJ;Q&k)JX9-O;u-Ghwg$CmaT z@40M;H=#+@$S(~{|ETgxnZsxRci3vrcC-t^N}dlB0X6*xx6DB)*I}c42q2hQp-r|c z6a&JSjAY9NZg~w$ysSWHk?2?$@rH|7{*}x@{R}#(v5-Dl%LXT3yDc$8Z8W_6Vo(O1 z65FslUL9k-F10gf@s(oh91wDtLsb#6)dZlgR7u3u|8{ru$4K2bhH#;4oSZl(DOTPf z@wJDLRLA+5aYy9RC?jyMTw~h(2woP5boSokG|CXrD4h^|Hw%2DktH+cCo4D$kfEP? zCF(4&ZmpsfaavjO^xO4W30+50B^Xt_#DgQ($iuH2FEoz8p_Ns}_&(+2!#3oH zkhPAVB_7!=dJ~NJPG^eY#m!xMDTEISU@9CQPGP|aea?&r2k>n8x~(tiSA4Tmm2+&bt6jMph_H4?{2 zUPM{2!{G7ej81tV`&ISYOl%e<+s(pR^1cRm$>`WyvTa0`1HeV4qR?mbz`xKWd(HyC z^F7MS#!~S;i)$R&XZz$A$wi?0m}8T3fXGX4n`y|4!4XchNa88imUaL(-7``4wgnC+zPEk%@u+okXNEGZXu1UsOWn_c(X-}vtZ=FpSaM$L<+*>Ho@UWm7 z%OK>O33KRDjb;C@Ypie3zojjFksAeshg3;k!s<1q=b4bjXEeP=H@Sk5LZ0mYd?#>S z7-)ynY$)i^J7!mq!`C=+PV#9d6(EZ#bcWhxNr8(*`zqd;Hx6B;aFM13+$V@k!F3VX2A%>fNJ)0|R?Qf|8;hX3;AVVnQN z7x$f;ovLbp&f&&Dj0htLypqrj{(wvcAb~Zm&Dj$u!^1#ec8&9#h%yAewP%RAQY$hZ zh*m&?^L%hPC6@&nk`DGbX*crf)M`Vp=xTw70uoTi z1LJdg0^2_<|MPjDaNvlyDs9#?4x*DyY6cK4X+M7{^9b_Dq(ph3QRtT z0z!WgRvq>7aDfq)+v`CqXWF@TC=xI;jBcI5AJ#M*?4lC#MzOif7oejnZZwbF%Gk4n z`g8t8)PMW@RJF}$bupA1g?&HTqvp|*Vcn~wxJx~&ttK9YNdn812-h%|=UFl9z9kU1 z?Jt73){9*vk~$%3E0v%-xven}>YW6k#|$l>MKg{5+h2=*hX&|6w2Jj+yRyuFBKmMC z_#)K3w-;OCYV7}^ri=&SP@{RllEmY``i?9s3vpe4ZJqGXezJ0&PhaZ7m%3yoJRT3U zAy?=$51=MIVsxyRc@Hn9R(BG$9 z&{?jcu~`5cqa;55t`UB`1;|^Z&U)x67GIo=YZM&&#H5Q`&ASg zO&)$F_ggz47Uc z4R|@LiTyTsnEZX~6u>tS)ZO>qYILhWs6BP) z%6Pv{?U_gkn00-m17}(H#$%A+_jiw#0D|D_i}|oz4g>kw5r|18|RRDAw^^d zk4`con0r3snRSf4D6`#nBeN6K?B1o2Pp5sAE~3EndDX}5sX`%$>uRC@ zOCajsilF}fTK{j1e}J)#@|)k?0Ay;1hCk?E;?FV2MYC)YS;ok}+yuO3dE-VP^TOyK z13om4)L8gYzUd*F@7;V1#uOvSWAKj^WE3IHyzatUeFjE6?9$?iZ#^>j&Re8bJQa-}*0yk%P>Kgb^g=Y!+XTl@@{7)a&_S*D?|e zjDJ8EPfNuYg1?sn2mV1d<57!z)4g8Ybl%0nJ$1<+s_tV`gOD%=$g#hAgfCY zkZyTo%|#!VBLS{Mv#)>V8q6vA%mm5;H~r{pEt+4wt;c?i2}%2x%y2o?d^kvvhEXO8 zFfD(S@=fwl`f6A)C)8n+P2EemvgHw!=vyCMZYteLB^VFTinx6qdo!hUHgAH~D`DBV z?Fnjm#XEEFF2CEb@?XF_I)EJZK?f9kCjx%aeg1#@wGx^K-H|fvvm*BNyzo=U69P^iF`1?bLtnDNzHrwyE#^0AyRcC2SIRu-a21$cx(m4$fSqN zUj(D5q3Sxfb&Sl9Hod_dC}Ls^Ty@bWPq-|n1dpZV>@D}$08+piL8_5H8>2CF+O0aH z4V@7o^M#{lC791_1T~?&-8Z#_TraS+EYBgE;kd=QBS7$V2IpmGO_@)}-@0u_vm~VaSPxjyTj8UnTB9uhO1CfHYV@Y? zy!R^dmhz+V0KfTPKp#WFy1O%8PEn{8a&rH#=jUYL$n6R&tZZqBP;Lk7&NrQ^ z94CADA3|eRC=Jt^b};jQhm0Iu|7egG?a;S0dO;ou)2;WbY_h;20Y8wMH%ZwBXs0W0 zzFvfYdL~MPLFS_JQ%}~n5KL@Xz+o+KwzYGi*TGRJ6O5)xOf8jcklm%nirR|^9iH*_ z51Yz@QDDc%g07%3Gq9a?8@{$)?jkY}Eqj_Q*4Ky+P@4jW4?%-qS>e({}=vd2X)etAE1g{gN4# z2Zfj@OEgo-KZ~Wo{Avm9ESD^O>#%d7scfO4v!%qLvveU2XQ4)3ohW8p1I8Bcj5`h^ zFxGUMqSuBZ@3RQx7Hc9IW;P8=Rt%6WR?7ET$a^{<8vN$GZNW&hl&*yrO`O)mizg~O zfq#$FL!~<|h<3+v8?r7-9r{C8oV3~#s!rgR$1hA($QOd|Q28CfK>tx1LDqd{IP zyf&0f$3{aobMEJnF=;a;UTW<<6O~dQGr>;4ShBc)Zyz{q*oqC)lKF1U0PNjTjdh=k&o-6K7X&#!4 zh!VAI+$K#e^E|`GIKJv4SH7GOArRvZ&|*sqFMy5z02ari{{svYlSU;V&+9Tuf-SDq z#cD1@9nY6Y7PgTI6_D|Qr(ohvrA_HXZq~G)x|NG<=OAC;1MrniC@YykHW-{rJ=VA7 z7e3wbd}!Wd*On-J60f*8p5+a{O^4*rw3Pt2`2nOrj-^QSb>;+1_vh@SWt48*98P?ymBTC?n~> z0!AvThNNVp3~l_r7wULXix8})n*-U}YPPkYkE+07Pg3mI??~>qG9g(emKi6&Nc9?D9Mm@bl)yiGbT(jVuxnTnT7!k)>y&1H zite0CT7QaTU(c2yptF>^w2c1D1qe3u`1E3akyj5FTCPSOeK2PQw;|t|mTAsMi@3IW z)J!Rp*??Zmt9}zGFdy?3Iw6Uw)g=b<@tz}xnqjanp2$BKA>gjEDNSrOg(vW)NVpgu z#SB;FV09STrxAxe!qD(7nCPF^X}(ZackADh^wuw`*fIH%x8{UWP&cU>$O9RenS2*o+GZZpi_(!zC z$T4i;nBUqrYfD5CjaYOzbbI*Ya-1h%v&75x|^I zG)RW>Yo^t8a-fL@lVXiAwO!Jz!LUjnCWx)Vw!(q%-husq`pw!~1u!6fWUT&Z-he+^ zcC6764Df1{b~y$MXj3vMBz60!15^S!0f#vf=wmQfozOd&(RSG2+4WVMS~3UurcWFj zhAgXY&8P?`IOoT3)avXv4j6WY39{KG{bLiXJ zHiKe|6X$fm`s}f9+f4Cg3C5x2hcGraaq;vz-H!Av3>i>Z{QDC~Xt_cKR4ONkrt|#2 zgT-Hl@$`(%LuOY>jA^iPWk&NDUKTmKx9N7SaNT zvIgn&r3XKwKy0S+mpL^UJhj?)5yUxF9;O;KI6{f$iuT=kRR~eUdJnjg)688ODpc-3 z2~|8k_wlBD%$-x~{0CvAkH(Y}cM$VrLH9k*&^^kggCik3Cx?DlbQd!GQmg6P z&j9d$*6RvYhlQPQ6v3l!wZ+bxbJD<33Yy9#&_2tLKi@ygD8NYeQDc~_0ML`~>=+X4 zt&ZwKzNzvx+M2RG(N}v^P?8|g@Z1eH55=6yp|+m>4BUWg<>}sidUM4vQ=RXsea=67 z5+?FOu-NcsTc@{u24Vj`AikT8atGmn7xB(%Pk)d`N~Ok74c3+Ux5$NCpth@ zwXdR41;4XSk&6vl($n|Mw==&-EotyEi2%TPW2)ML5cTbb^a}*RM`6hfEbvM#*S>sg zgrbtwtr*~#m8JDS*=DLoN^a{n%sjzI^{aj33Z~_`?Yc(z@1SO@HR5M^2>f=s2Nhf? z+w$fC5c#aPemfYzhZttv;gqaF#iTv*>=#mzO34P)oou==8lhqDd?4`V_6#L^vrez7 z%nST_ZJ`3UC}G0LrP&oyL}%Oxm&)12n_iWoTZAKyrqi`E4fyjNw$27~E6oU8qAqiq$l(x~{(T6+dUmAmD`2r;#d9!jtcFXJwA+NWbC!|{k`9}8ABBUq{ zvKBjov8Ty7Db;%Ap=4#RS`UFPN=9d*u&#aIfkndwqQo@A{`@fcH$LCZ4Wx;?=XIJU zOeZxqZ;`J^Xa6m}@w-{Z31txMyI4_HO#n+fR0J#d4}3){d5L~;!m7GDvv%KNDbN+C zTa(vwh`p5bXmo_w@Qy20lI#?DM3(9-*q47bqPNmE#IO+J48|8GMruR%<+Fo@SvTqx zfhN=i1f9gZrWlB=8@|3tBZZ$!{r+g=T+{Pu-o#*gQNDv;btXUm2ro75R8=LLNTRS<5&i>|N_|;v z`Zfn}s+TKInM4Hc)wA-PA8#VU?lTtAq|9yEqw`&pzU6jsQ9Iu0A)t?%Ag`dXD3vZO zvddbE5p8!doCH?Ql&5mJ6eM=G;AIn~Tp=jx5{`3nVzx?=)9$q6;9+@2neE@auoz4f zhzJ&U-fBDS2x>gL{MV<$H`N{a$KWTC5jXo0IVdByN#xRo13KG&8EtYPxsB1tz6-ql zUFB;~N1eq=rtlM|h_bzQxEnPnwa(P)w0&JE6pf7@KZr@GaKq+*;6t1q?Fz`OGSm6= zUTnSw4Gry1gI54~g;jYd_N1^OS&=0ZPQUUsum^p7yqx=j4<}S~{(jZH7Gym^a zHMcNXwiFv|2uA!{n%zegnG^PW1n1o%Xw`!7LWx>7=?bF+aU0zRGaE(bGdzsU`&i?> zv{@OoFjzjZmFcr?BQlS|`K0YuBo7X{2t zd*tWkg?kI9FD*DOeFic|_S}KcPLCxy5Sr4>Ti(KFc@>G29tC%Yve9M(_NFH>K;>(L z)3m4N4xKw*9kJeaFR|XD@%V3T(4}<52w>*}r>CDj5UBQMA?xrb)!zREa29ADE!zht z8M2?t51nf=%ej{QRp9ol4kjr0l7%Mm1IeDriTUDg6Qy<4jJ~ItE*yFlw&&Mu3y?)a^tOw7rmq+BLOF6_YXif9bZg0@y0!xjntXtk}&gmSfpIXBjn-8+CX{JLP zXc|kpnG1)QQ|VJ@Vg?=B?gHC5J`!fK!ucX4{#ecJR$m&lIFT$}Fa|D>H4>h%IbD?A z%DoE;(x?8OVE(?mAkh6^32oIKvr5$>H!yC2f&KoZ8vKS^yrAx-<-L3vGbr?++y-E& z>+rtJ3hiddQgP=O_MK{A$!HXd_yMAAHDECEXe*fU#bLf}#`o9fQ0LO3Yd70 z-v4Fd{S54H%Jningn)$k*H`v`%u3Af1D19>JLiA%Ex}xYEj-7pva!B?vqmJE7^%Yo zie>oQG421?MsUCuk%FXiA+fWMeSSyB_z2A*Z2kb3_5SPy0L4Di9G>&eTp8H6TWg~) zB1_77&J;B08fNSf#p1z|;EnVty&c(>+&?i!nrm-^o;VjxGUUIQWKfV(0U7*4#pJlS z(J>yoIlg>w&*>lH6k&Su@r&sRXbr!p*8lyX-~S8qzHI=t^2UKl)cGq(em=4=qn!}3 zd?CSps8yNn6xDf=QM87|b;i7>x@FKX{ocdWZ3VNM? zx-_8fb`5y57@U|cvk6^dMB+2rfa%-9kP3`SkZs+Ld%0^Eg7QA=2HJ;>#X@9S z5u|^$g97huxPf)5;?2x~aX~@BVaOl62vIc`0|fKV7-tO)hJe-h*?fq2DFT*^)5Knx ztnYl$oEYr+?Uxjvi14hEtWUy{k;^`Or1o)$S$!tu1>-Y&h+7(lXv=qLqWmG|r+z^y ze*32cA)qDAu#=8{ag|gAK0-zv8OiQYma09Bp&e1!`RJm0%%Rtd+W2({D0Q{=l@hl*Q_(}>)9VN$x~qspW<}ZWtF4N&+DLj6y_>2LsV}v8FR-EUpy3t&w=Zb0K5LA~|!m6~4fcDN4;d%KUq{JXWY#f{t(?g(M- zg=PH7V%os|Ybd>s13t?oTw>Ya>S$b;#`Gj;e(Ub=vP2*Sbs0yhEt+$cptRt8NO3HH zNTW2nKwNGuTDA-QaYOlopqY!79VwYK$%n_O5D@DQmK`$9G}X)%WN0NC9#xh{5ZMU8 zndmqNuBQ{3OPc2b@J=VV4_^ZBu}iQ5BW6KHA3Ly{@?i&xK^;oHEJB34ltd2+NrpJX7**yC2d$Q082*$Z{TK~-GrtWEkcO-l!1|g zu66k@cL#LWhw1sy#Enseb7hD#aiHn8iwDghk zyS*3b-Jiy2@|}V}`dH{pJZA(iK(qXEUHuM@-jaZ1y7V93AQAWxgHaQdxboOCWXTUb zfxR09(1{LnXODpiU2QQpBnde0;bSqVE#{+!)Sl3_3nA`+{hU&INwg>!U;2ZV zy^>(G%mJ!-PTYyH_W)lHZZWhK_HK2udK`*Xv#JU4J7Kla6BgB6+u_2TWU=DP&cxa_ zzGxX!jw+Ol()qc`iwMgA_P}Ysa>%evwbKyk1X*XhDO*l>VDrK>&3i)qe(hy`bx#yg zjN)+}a3SRdFjPwi9rawd=a^BBK#yjOmX{TRoEv6zI^%gsNY^YS{+Hyc@;*~r5}=$vWHp9_=LGj`UQ957vMnSEKh zcZ7F#3>a#?qO4^ywBqzunnHOAM8C{0mVi=`90&VX=@A{dt>+1Lwa58c>XFd84Tu`Y zmh_L}rvvg2RoVdY!F=)FM1EY>gk0@G1Di5puF%@=4?Z3rYHjkI`HPtN6)nj&9+LGO{?d_N15e$ zjV7IbR#sQ!-~gH*OQ7rEA0)b+_UOD%_?Zj9@kG!L3Kx}G`M6cn_V1d1u3Nf`jIC3P zuc)*ZM}ctqCl@DAo3z*L6@ z7-TsH6NhpP=6={puKk{q(PHb}^8Hr0^WKfY31Sy3*(MX-s&McT?P{M-0_HwT99IsB zHlm))?BLBeUFZ(coEu2^poV|l(phfI(FhZVbhjax`Z=LOz+1p-ZJ2;9h3$Bvo3@7B zHm|9-rDQD$tCz?=x#U?5a4 zK^yFB&5{ObJJEr{!;*ySQKsh*>^)AV zq{t*2kSvuBn9ld>Ey-DMdv#!1fm_wtW$J%wsH_IhCE$qpoG!vIC3DeIpof*D(%l{wtcUSUG zKKU_=7Q|=is?RKa_#v7AY+nODHD#`UA`eJg59HrUZJ^mW6ZRDA^umd(vgyVYD$<$)8;qBgYpGwbVKE%?3j1rV}*CxI`&7n_H z&&2Qt-o`03Qq(ga?PH0>9W84aqLCD}$s!NMdMaID{w+mboorNjl7mOOZctx9i5}OT zhR;zRMuw-8!2(LXQk5A)7K?Y2wDnCb^9CsoM~$p7p~Nam!x_YZ(UEY+&YBC-ifKn? zrIa@%c}R3OcGGo^z>D!w@8`&yw*q6hB-+C-;iRsd_9%NZY;SYgEoGX7Yy8rHE}aC5z<>p%~ZSG9jr8iwVST4^x}ygyjEKs?HSY~ z8Bi}V>UMWqx9G0?Ycza(BH8^Ji=-2(_JP=|WVu_TSIR?)XOl(pmZK!g# zw$$NrPey5)kdlkvz6|DW2WHeT!DCdp^STR=NT*t#=(VN+{bha(=J_t>+_bRPSBTux znOBOdTIbaZKkVMER~be;KpCodk$?-qs#`5fZ+o5&aiezmcO1$x(;)a&#GUOj)RBL#MelPAkR4Dzt#*De#tCq~TcdYOEzN?j}AF;c6Y%3cL`ASOz~3*VS2vEyufoT*@#8b zCN!KDBQ}nu5=8urrnPfIiC0vSf&Q z@SsZ{F5bRue!>yBq`BUBF{dSqf{MzaY8zu1yLa&FO@;6rC5t54((Rgi_@i+RG;lw~ zDJZ**Pnf^;oVj!?L5aG+P*{1Iu6`{9xw+4 zubsprr||ZBZLH$MhL+gGvvgX&^+c#4QI+&&qSax9^PpvQIsaIPt;T6u*E)KkLS|D`{u*M8f3%AW;nA9&J<8$KWMCs`>}{n;VCf7F_6=jvBqiryBG)l0 zf-{+e*yZp*%pUOjJMrW4n?2^ez6HiVE~ zNh`u2%D02+u??ED0k8Q1(^(!tvLMNp-Z0-iL8*PYmx>4fi$w2X7_bawJ(mO%l|{1= z+4zioIg>-Dge{-MI6jV1R8r8YUgcPG{(Ir`E{d26pL5k4z~8C*`(<_Cvno}8ujQSx z?|b2u-G&bHWmsdOhWrt!{FxlhCT*K+p3fipOXBithT0oOQb!H5ZHTr}1q9Ct{W6Oy zSSDHMZ;gO(Tte2w3bPM9I(07Z6!!J%aD(NiS}zYcbRlkg*S(WEt<-_I>T4nq;*!l) zYeu0~ZIA9Z*9fx`%WANhIPL9bs{uY0OEm-#zZ#;~IeW5ycs>LK@r>5N)G!g0ZtIX) zXPrpQDlII6W4q+OcO=g{u%XBhR=^!a;CWdz09}#hP>~s?454tll0UuPn0%$0aQ?my_X{_m z=S+5~3n)@-iNgE1&GNRB*Cs2-1eC6Xcj(+3#b@z=B}!KG;TT_X%sa=XsaLC`GQ{1n zNyeTPuuE>E$vee9xYPuC>2rSU#eBM*l2O9W>KTX9*xbfF!<383u#9!s#a`%0S?kcg zTQBn}tAme}touIft+v1u%17d-eO*Mg=OS_e8P(*HE8{bD{qYkExbRIICST$T42IZ9 z$vPId)-YOac%$H9a3w^LiNb@CHI#VUGSg?_ZC5-c22tL3U`#F(uIv6Z=Z5Ny}>QkniJaZvm_DY=zfVCwMf_zlK@9ndjngtz5$dkX8rgqoO z`sH;#ZnL_KlxS~>M}h9DwvQ6?Zaa%qVATx^mjp3MQP>&nVt)r<$}RQnD3$KM+o{4z znTQV$7IIZA%MVv6@t9;J?bAgLU6xrqU75Ed43>)(xzvTpijKvuC26aRht;J8iF{18 z$~_e&Ymt<0!Rh*f_u&J)TC#{@e~ZYP{O8KT4v>xLB|X*%NqscR_vy1%+7lwYRmJ=o zh>b0|o06jcob=xPZpQ9e@KC!fYIk9Ozw`sUW{XUnR`BO(hkD|zo@Cgk36#VwRPy=M zu*C3TrF5~(NBCre_lFg_G841oulIEp?4Yl$BbAb zA2EF%OxW4q(2`F$=)XPf~ z!wo3Wi+G8Q3EqcilQm*_Y@0eh7p%Prw7Z~aKn>=d@um-Ny#lMFVun+O4g+mFz2{zg z0)FC^`}jeUT~V$|s+zTduL#zLh0}NhFoen0TQo>D?dawp2JV#a2>f37Dd&pZQ@ucf z(rtFu7PWeNGuji=CT=3K-CDNn{a8j%!Y7_h)+QlQj;LY2DUMAnDJQ(MmpdRHo{LX3 zP?a$xI-d+WEC;zZ-s({oZS3eSedqU%IjUydvQ#xYVh|8}$htuzW6+@9yt`iN-8PPZ zer;?cq$+m^WF&Iq^fQgxTN1GeXXK%3mv#x1~W6|m~ugqo^D&J7|m6}%^xIwS;VZIJ|^iw-EQzgRh2drRV#&B`pjgloT zw*MdY-a8)a_5TAeNtB|rsDwgDa$6;2l!h51BMp0J@0HR(l9AgM**A&XUM;e+Ze*rx z*?arFE}c=Gb2{gIKIij&{Cf|lN_)8=9P{H zTgts0I614K97&Aa=3^__B5nc!Ps4yM#vAN-^W@eZf2j-Vd&2`=7;$&Nvo15dku|X? zA=)dUDa*986*z%imARPInD<#{VxIDYZ{{SED(NN0A`hFyGRj@VOzf^b+Es^x7~q`_ zM%Ux!>TR1$Bq*M^3ZDlxV`2(#81Wt&dHbg77joMP&SQk4L27SCL6J2?>mlcFHv?rH z)7px#dIC>O=>XxGov=F0o+Pd%PxEl2wP(YQoz~l8 ziv`WLZK{6vxMZ;DT~YR4*k%vH`ynBLZx|lxOatvoXFfU%&l6_zAHNsLi?-wLYl!LA zxH1s69`GQ&`1u$pXZp|af}uJrHxq^KyL*Vbw;jURYb%);!>h3?Jyx-YeINIFRsi0I z2kiTYA6YHDfPH_@U)=ZOS!LQOUEoD!Pr2A@ZjIF6S2C!~E?7!JVg|j;{Q_jX>5fu-@S^5ovKd5}`a~t)mC^Ue z1QKP@cccQI_11E@^|;L-j9e^M7Cx-pEJ1|=GT0>c*7BW-bVD0*3pi@J^+IX1Vur+y zqwhZ~PxGN+%Lebuw%z{2F~P+^;6O%3YY(45Ek!;5X30)y7_HyzP$Kq*6ye$- za0GmzVP3&`2Zk#xH%oxyvL_QB#SPwi<+Xt6!3#l+jI1-u_syW@HrT9;#jOOXbN~eX zu->~3V4dt1uVFL{@&ypB&URisL9c@2`CEn;LKM*4%|L;?!5f7V8^f!T(}LAzXDd8j zlmb?8Waok&l-Xtw8IgJmPG1_m1cc2tNMfTEumX8X4oqxL*d#%csxh3KWVE@+r;Mjj zjY=Q`%-yorZ8=6~GYG|_CCJL0M&C098-9YpOJGN;>$(j&^3ul{Z440)cxAEoq+1El zr5UY3+_AwMH2r31%tcCzs#7!bOHe?!!A7%3U?iQ@kRY@uy|9U0(qPbE^!ze@T1)VM8SFe(f4S?siQz|h9yF|+uvTmcC49qIBP5-LuTnD&kn(!WMsnt0Td=6qj9I-)WCpwk` zMveHM?gv-ef+rn~o1gazc#$pTlj327dVIz{Mu$r<>UPj_SpSGhT;<7?MQNU^abC27 z%|oC*eW5X*n^;&FIStqO1bproGJ|>-Td7tjDDpc#qG4g>u})9;xE%eafPG*e%K1H+YohzB9`VTVY1@a|DrYu{huNL@#rgLq4U631F2pW z5J;0*!=$8e9z!QC404A2Xa-xt@D6M{Hv6p$m+KHgBIev>iN`a(>&lD#Y zVN-znj+CG5Hj-5iQQ}@3Y3sN?z>&ILzzWUtpzhw_-H$baUr&Ml<{`Zpti%dr_=^5q z2Cm-E|5cwj9=UKgO!Xoxt*|+?z{2LnmlX%*RKEI093v(>$4oRz+CtBVU@7ki;$*Q2WCfAMAsw|DGZo@`w{ zT?z>y@0$8h+?+VN)qru~5(=kmCmiN!(M@VoozT~B^BwOwz~h_<*BhX%($?lHGtzBh zVp0ylkC=O*@xQPt8QwQ?%qS7%VfWPpM50?#zkNrwgUhpL(~D zs3>2#Q~zS%OrwKAajxwH_@mFXkNA!ZgwCu0FTwYOQ{vH)71#;*(Gm!-bv6ywPl&&{ zR2(u>btEF&#ja;bHON{n+%C+pSz%XCRy z;R>$D`Zw7}POCmXa@mYoJsi%Q*2h(LTm5y0Ys-rS#N5^2^-$|*%9h(VMVHR!k*Q*N zCz!^kh}f#KVjoA_a@+@fc8{8vkEf9>$4AWuLV3AtUDxUMO z=BBiJYmcw4@_vkQcYb<9`8xsMG)Vzl@h$mh%ec~@nwZSynC31GpLrtco{{&st?V?% zsR?z5(pf*9<(l%H`8PK4rpFC zcv=K?R4s72N-|e=(2=VZKzwfY2U+{ zsC`rQt)q?U&h^r^Cv9q`#>uPibB&z2nlLs_Zu;qw&O^#70pB?N3-!^FOiSaQb+wo0 z?*uuWWjJ6Trx(&q&BiS>QSlTL)V)+BUPWw6a>~(r+{q$le?%8HcXII2jHtqgnVw>5 z$IK}{*`Akq`>}o0Y*m5rg%Y+W73(OJ#wAVcc2+F71};1GV3ntcxJF9&?6m9C4B}Wy z^7ixgO<+!UFLj)r+0~{aDizb2x8Joi&R6Gdo#W9eI(oMoVdoUIQ}gzhy;VqBu!<|a zI|0O*_1>jLu<+zBN6s@4G`Ey;JuW--cxK(6`jfWq2GoAzk}C6@GGS|FELFnj#il|B z@??FpWtniY@`CX>7=pqb?URb-VZG0F&MduUfhQz~Z)8JA!sCgGB+TWWrHC_Aw(S(# zx2v{$xB5K}+cfSp`lR<>-u|V5Gdt%arQ-VM7^^5_duat%ZUvFQ#4O(q3QHSzIuzBL zmbZUX`CUJ_xf55ze59**(e^3{kaPQfunN;t+K}@PD*KT+-PpGd>S$*26c8W_= zYGE@6h^Ze$w$Fv5Rf|z0SHsq-XrHZAy&dHAY!Xrf;EXgm-A-|UCnaVMFI(rW z`^&mpywXn5$72#Wqd~MZRJkDX)ahWNjvD6x&T_ zp5>7Yd!g#cIdWwouW-3Nv1be3u6Jol@?qwvaQ?)yomCB&WE|(n{^e_Km->~Dk8I%J z>|NrzY_UH{r@ip$h;g5?@(9KFaLEgiPa!Gg7pe56nMP;>t&glYA179b?Q2q;z}Qas zcdKF5>A}v2j5~!>zW2hL#kB0IJpQ=*$5kuh=N5Q9IJe0!Y=geIB=-?C#x{M>7^9vF zI&9%JfAsA$;fMgE)-?Ys6Hd3Z==Wd$?gwNyL6eHF{*?CTzsaTOAbmAW8Z%SXIfl)< zZh!N_&8}-eO6gL;_~SRT`r@t`TXjnZ@0F$*86MyB&D(w^vnDOp(d#bdH-GxwJO1)% zOQ}e)f}FNYCl9ar`s2H5k}A{=9=Ds+<>&yxwU6h+gBxANL3ot5Re?dAlpx&!<(%of z6BLMx`0xs9%A?{aFK`%06TchlqjW1mQz?)^52E&QcU#Q{;ztLH1TpNTPiNrA zDq2-&sYmR7<^rfZ2XM4vYlIUQGuc{^B4XDV@Pv^JBcw5P1yi2ljZL!72DBNE>$poo zL>b2oabf4nKl{Fd)>-4L*4ZEO1Hdej3TdSTOhjOux<`g-<<&0h8 zI~`-3X0-(c{K~W71htlEE$J=NzU*P7p02XO*k@#1sLBXXSa3KJEU%ca?D1fTYM+wp z8sY@x4pla=T-ZALH!zumGGFVB#DQ<6M6}Ef`>vUJi|KFvyeBPIQ0guz5euY0N?fos za~HM+6SZ;;Rhg>1!IV2Ry|8*YQHLL)n@a&5{^VC3zH1W~t%TjpHN+bs=a2Igovaj5 zpkQLWL4xwwC^lidE0kz0lbO6Mc5nH?BEjd9In(pSgL*Pl3^#@EgM>p)AvAUh8%RrVSB!q6;oxcdB z{d?Q~y=@3j`}b-4hn_ZZ#>y}h#lTDeOC_6VbW-bbtt|*P5{mR+xjWk*2mVulYbC|tiIYU3uQfi7r}OHB+qE%AuT$P(;vYO#3sTsS^}E_rT4F! z0(zTcqt!5+`T7w3Q91F|VGM(yqxRA&%WC`n{J8#|>LUH@nl$Eh#Q)LH?q+gns(W_q z78)r0VdeAHTmAOiqxKk*|KtO^xLhNGlN!jzskF8$%S)T1n7K)#&$x_e+XeZW(hwt~ zQ8=f;P2rLFOHE}xBEB{Y-&~-mQ&;&bV!PxRpMj5HR-08zYbklUf^W$3e!IyKbjO_1BqQw;94?_}FvIN(Y?#wUTZ3ybW;0Z_V zi#5*4z$v_T?M60tQsRVj06XwlO7XDK3dvs#%af-oI%T|()mH;coOLGPT%-DjK*1qB z(NmOXeL&uKKf#xaxa$=3WcGyR&7T?IHEn^=GSc{=4uco>%rc=bO+rb$0 zX&nD?u?PW>yL6}GVs&8pA?9Tc#mH{Bk{YtNc>?Bk^=+0MN%pCf@9b@z6o8CSS% zLjTNd6SgOqK-`$;s*VNRUVC<`+gzJLV7A7Cw1saBdVL2(%$bUZ5OV zHG&MNukj>`hht4l`UFl|-Y*M)ns(Y6p!*yNj(kETru^{jaFZ?y-Mz@ye;8P=VTZhs zy$CXoLmH18T3Vh#fD3Nl#2{P!C7SxC92E-VpnGgEk|1o1ef8AVXlD&RvrklYj#)?+j9K% z5~u_$!CGp{5#~}&NEH+cB7xhE;8NFS++vhziJUHjfm*ZaH~S+`g(hMH&(F3YfmqXYVM1Q zH*FAgc$0kWldn_jPu7$@GHNZG@q@7H3}>9rc>gQAFL zVDV=&23&d>3U*7zfLmKZYVKIeGiMm^83WEf!{z`FMVK*IGFibmE6ZD?#kc%w0>CrYdPQzsFe#`0XAQQJA5D56GX#@)6U076bkmwRHXXdA}D*D}w3Id!xLC z29yVaY9knXSe%shFzO5_tDYZw{vm+&)H+u`()2T4*ljW%*`Hva-uTb#)77+7s`6>E z8B&jr%e$Y&u0d*VzB);+C9)+L^$lfC1YAQ@AxjV$;1=Nd>&!gcxbs8vxSA1C1$6<(+Uj9ZpaFuU$Hf6EP&@e2MWEWJ&HhHK2s>T^kHS$+Qf#y zli6Z?!X(F!Oy0c)eyTB8)V~Vm#{&RrQ(O07b#mB}vtI6z3l0*5@t_?a?3^|lpJCQI ztoZ=RCGyA2K{pekdsjR0R#|H{46SU%;)NDQUs2eL>DA!;1a8}!YwoVkhUl}K$SO0RQ_(IdqgfxN&rQx1z}i!feb^-5I|8F( zV+cK#?r8}KWaABzf`ci;;^Y5%|E%U6R=GXCH1mOiq49h;Vn*_5u?k_2Xf+Vd2STpX zV?O&Zm~FRTjc1DL1@?6tguY>}mCv9tKN1ZZcfIv#zj3HDQu0(-|ErwvwyO1R`+p#V@N|CvKr5!~uf-$rI9uM*-nEnCt9Pzrm*UM#WDU)m& z?|54b!+e@&J~)qPhnN-zRfMr~%8V^ElOC6^YL>hAcqwC&j)pY)RD_^@g`bT|#SXwP zF8(w41sn<=^f~SIgGXS(-DJ{>+aVmBS+P5Q2LocR&j1HvcBm5nC`PfeB1qB~EF{O+ zOkh_^kY>{c8!r#42L^j=j24c-V@Pun$wl;ou+$1m11m~-#vK}86)Qcu%lal zCw9`e|6)&LlAeDRdNgigGoBhy>^)YEgJ85}h0A9W-O3CTE(+lt9n}wGQtl^SH>7Dn zUbvR9FWR>PAox*fnZO#aT!+`RG6FB)zjt*3S&YB(DxrT@5I3oC(H3autkUVWLEjaz zYi*OFCg;La4pPaDL@-dt)}1uMopA#c?X)n58(#3~at+jR%>WuDg(@Eir_aCG%az&i zmUY_+Vw!MN9pM87@!NV~IDhoors?#Efu01e2%@zo2|@4l1wQCkCHG;CN`fBBQMk z8XI#_4j<|O7qlxGobQ|#th(#cE1|n=KL(+r9@5dBuxLBc8c+-tRK4Cl`=LNzTNnzC zMnh@ksTlHRxL9k5hEAh3J6(4B^DQzc(K#_+ty;LQ)D#qmIrTej;?Mn^>}ewSOi)x< z^L6{l&$Cx+2`iv6bsLRA(nqD2AwYFl90)?CkuRs5RLgAx#`8ukVLa3)rq+q!wj|Ua z#Fxd+*RV$DW28~iVEbVF=Rv5G+6M!T{F8?&@C?AyFz(k>H}q(dH`T|N!-%o?ifW=) znF5VU(*&yJ5I1P?eqfUh-YbtB*EAbV$`J;(=Bs61Fy?k48eT*eM%8a zyMFV#%Zbrti}N5iWaQhamjhe3OnO2e5R->e;xEd@2I;e=_$&4r(gauR$6aosO4He~ z!tn6>UVH(X?B>S1r1U@R(C`aS2;@B5?XO$8c%nl?AbLMp)H}kc2DAf^I{os*; zM`r%k&`T{92dGn8IZO6-TD4g!S;J$O15vbZhMt!nLFMyw@!J;T+{!744b)*t>xF55 z$4<2yA}F=_xY3BvD{`*c6$hB{x59(n3!DI1-Dzaex^%u0qj>~s*>;zNOX&mJFxzbf zu)Tl5w?>|X0OXT$OkmOJ$4eW>hs8UKPd>}!wS~>MJ3zA zc9Fey9mz4y#^a|Oh2U9mDExF#K1nhVlT~(zAFg`lZV%5?*s;yQcrE}PV~7~^89Cf* z_%ux*Qmm4i>Fq227N7>6;Meb_$0}(A3L=R--&~jPLaHb{o!@}~uYst{#UwQkJ@a>e z^YbEHZEWdDx%uq%>(5gk(J~}QgyS5f=zqDh2g6BgNx0xNB}KgfzrzODz$*yE3X z%`G;AQsB;s`M#(X%>Y8i07krQFn-4&ty;f(7N8v|@SxTSsYwZ7dDR1MHwwDbd%<*^ zPa*-aT9YA?kg6gKn9x~#>e`M-B)=Slximln45ZJuLek4(GHj|82NaxD^pHMF%X(X!D-~o}4&x8|xVLAADT|z8IE@OK+Z0&G@|0rNG6Y6eWJg4R)r-2me;6_=J+ul!zvXK z-=D&B4>=Av?HLDFQwjUCA|k3#2Dd&qIzJ&_)iD?j6=OCL`liLMAgW7PiPHr#g zHS^&ze8{s+v)#jI9DNy=;Dh00xc<@zJ5ozlPbc= zz;LtHG}75!2BIO&2_OC^D^Ojw9!{%}E5U}yF~PV5)8Ei;%^TYaRdJ!*(ao?i3)^pv zC^qD%^&RH7*~YYQAx0_YD;_2U&Se_62}Fvxk-Rx@8i6~(WTS0{T%!CgF6W5cHc(j2 zI~gpn(Gb&wqT!VqWNlDac^x@8CPpVL%n>|=W>*vra7OZz!Wp?vWnDPKmou`6VH-a& ztFX&FF7-5_VXT>?06Jr@P|_MDchn%Vr2Pat;dkt!%l`yEGsy3XTy^%7RO3*;kX)=S^0oMa-n}YkqMSm2*nEeU^A~zOK^&%~yy{-FdI(dA`t`~9?01Kq4XIaoS5R&((G?)7m30h#_vJ5 zKGzC8)d;{99y!T{s5PYpyrZdN7UT#yP@6ZF&~e3(hf4+TMwHWbg~Zi8809S?vlzg% zYeNBYT3!JK>s%Z3KRr$E&5-=j;gyN;9=OtE1Za;n09BQmTN?sZ!h>HF-{O&w7n?X`80~n;G{(*~bG}H%VJBt#jH7{Y=eQmEnIek9D0+ z{!r|_f5f6PPtih+%>V)7WkCQ+f}zV4!$b8+`A0a@Ab`#xdT>+L4 zF`fAArZl{o>WM=h6=Ap`9XY25c2;t=;j_(ogPBh^)Ckit5)EC}hG#$)ll6vK_!m`yM5^MC+aP{po@2Ah%%UX zr+KSBV-WLT7z!z;^#lk=vVK`23YN5lW7Vt?5EJ3Umz)8(7wFad5M;gI1XaG^E^ul0 z2nvZ}0A>d?i~=~0S1~;>% z%2>&rKMwv#OVg9rAL9X?tC}%~Fo!uIZ*T`WDgN{o0C|HApcZv52;||je?Y@i=1k!i zHBY#oMw5#mz}|yxyTEefq&9?*c9yu!2Z35}v71Mly8zFC`$VwrP0k%=wJf&>h|YT3 z{~8}IR}}Zlme&@+?$DxjfE_;nxKxYP6(pVj_VF~+d2GGrq8}i5BG}I4>>e5u%wCkD zR<&gxKeEWOb?ZgAmXsgnxH@?03`gFf9dB_3x1PaaSPa(8AvbO!`2|2*fpwwuaKpMnHf{Kie&=v zvs<7|2)|Pi`7^GUXPaWncKRgXXaL2s zhd)+)=XXtxL6{uY_an;cQtn^W+qLgnq3e*B`+<@!okOX{k45S`M9VXl;Q*uSN}lLfY6 zc}LZ$S+(x&SV87TCc9En{qR?u=H3J|*GnJo-p-u%w744P482N{^;0C|yF0wnuE!q( zJ9(89TrsxdrAgjcad?I;0H-9q$kfQZsv`LM9H6Fi%Ww3+63iK6&uU#cW$>mB%JW)p zlO<=2w1oHJfC>TJh;IeqzN&aFa>GwkhAu4c+C@8ErwdX{ea~s843#xMe|`)@CDYEs zFbo_b2K?U8-fyycCy~vzW>JpzY=ZjOCB>)AK)X4A{XeJOAlS-cGfsVWwJ1)--+b!n zk7+NBh^eRC1w*D-*pS?}>>H4BR*AomxK-a^OfLpooO1IR@KJMNkE`6nrbXocLb^B; zI>RFN^lH)excC)zu}7| z$S5-;AJ0iM6#{4~exkCjtm9BrRM4jY>_ycFI*9?=8q(smPcCV0-yUq}F?DzsE-&7n z0j)uGOw2$y`D6WNrpt*U3E{E^IbQbKlG~}U`|8t0qXFtsz8F&{>pLbFql)DNdovZ2 zmwqoY`dNbp#2nFpp+!{z zo^cpg?Z%B*!Qf&B+W8|aI~~Ib-o>W_5+4|-U7eQfO%dc+sH+K*q`wMjNQA%!Gii2O zD1El;9I5$B;=Ml~%}6y?pLbI+J!*iNKq*DTP`%8s?J5h2spp+4#9ruvJ-q zA(0aq&N-cy!qu`I=q++xx3tkuzu8WW?T-HE8`PX!b#b0CFo&zXWl+9`**q+R|MACR z$a?dxV-3-7Hon|qdPi}ZFxC(xXp-H%v+!k)^ob$v7rOEOocO0X8LT5MnIqxusu}t^ z7Y?g@yb!)M7RXI|L82t(4S|AMyna6T_;cp1S*GnS zaYnDT7>wGTU;tq9ygr~8K+)oJSV8lWa9BwzOv_9t1^gw;N-o*>l z=e4FQf)sP>2XIoh++fVmDUz6^E#-huz+LwtQ*1=0c+7hnv+CX#4`Sb4u~(mtjpD(I z7SW(_Ag((^MF15R@6?|B6Qgq&$pj}y)*sSx0<>5Un((PJ*zR=3Oi)VK$g`Q09SXz> zT0Y1){Fq*@CD&j|4EJ!ZaAm0~W&1oN@67KY*Se_?uCFpA%XAEPmPMlv>W&3wC{nG} z3omC?*sAm&<#H>tNm#ieUft7@mFNaH4@LKw3=qygghAR;cyc~SkLGts1cQ25dAmnO z%HvWw_p_C2*_d%OL7C5EJwq{zdch{tSMiJsU|ZM?0S|SofL*gR>SI+*1C zcF3EBy*ikWnjf#9kQ`rsDUB&Az9I`db`-SZeG5t+O=&!1WrHkf&n~$K&1XR@UEW&T z`z$emeFrf1Wrtb_hgCJF3HtF!PqlPExoIFuN@=J6T2S#&Ec?PUs%ErSx z9w+hmh*N!8L((9N5pryt0pNFa#4?dqFQ=vLKbC6SVo@ixq=O<}PhuJIPqQW;XDiwH zUqg@5X)wO>MkRz;YLH?BA!8EGzaT#POwDNp$h;6#os$nTSlbDhHl(WV8btw}N}3TO5jAAGw|6XQ2=L`RdQIVak1P-1bm_!{{rdrWi3 zRXDDXFNp!5&WX+Qw0I>vm$ftWQL!0a0j4rB5=QHnry8%=A9$L<)ySHl5-Atch`1V! zy`SLNHODEQuD6=143-RSo?k!$!)7b>VrA2uj+>hVm@j{ZEg}U;)tGjlO%sA9LBmAc z&^Wn%3_5s%QF@KIkTmX;ihC0m*L=oy6Yz(CNuaUo`kkYRvFAC)8r~Cj6E4QQ+v19oJTu{Lr={UgLi-_bM`By3<#z?@7w_z@mI*+Z@$&tPNc$q3TM!m ze48HTI1?d~cl0rP^-h>k^VX;D%gT_dY6h#*K2!-8mc^(uIK%nv8hwa)x;i{wL#JrkSGPyH`+&#l|1*j!M zQV#PKAb#!L?23teuKc0)Y~DAD~fj#hfE-P6Pz&?C)&JM4UWB13ut;m zt3M!Ki6*Om&BZ1CDFv)5J|T`g9LD0~#Y(0Mz9je0wAhrNVv91Ih*5iO2||2xj9x2j z+RpctnoA86aDBmiSYxdKqnvHJ)6`hy6EIX&BCSvGPlaLYs6L}2r!3ojrAE*d+k=(G zfEZgDYB!fRQ0vc+V0M?EwJ%z`VfG?#aIzv@SAf zOg+x@%9TvVbl%VBe#jy}wgqN`!MCOy>t?rpmOU($Y1){L2|h31{lp1`Nm3^B?BEGD zyTO&>YkVK3P`YaX&E6w?Q1sRhCjoTfE|UH#Q*tZuLkX$|(}c2znAp<1`1&JSa$sbH z&&nuJ*T!n8-Py8lku|u9)uZ{QN}^<$W^6qpo;9YzPbzrczebwv67Llr`C?w`(^@%o zynP@OZV7+qzx4viY3x>ff<^^`Vdg`2WGXfULu{|=V8NlCrnUfmFWwJUVONk6ua^$x z&qhUU@a*erfS;Qpxxq;xhuIq|gWH_5m2()UWCqZ&ccfrVx`&-mY<{co9FntW@3ar)h5 z`YRebSm7M!^E3i#Sh*9h~XG+GJZu3kn6>bDJr)WKD9xsg4h-@xg=xg)RXC^5F$OAXhj2@QkIiXp%WO{Z9kbI%3qk2-!@t*zf0v8Q}2Ey-(EE1EB9>{^0n_0g81{wR8Utlus|!R}DMZ z3F2Af=9-&l=_q|&)HC`P1f3RV1KVoB9fQo(X=y&Yz;OW+#!TjTWFyQjBzc=OmlxV% zWx~xkPK{8;9fAqOjvc(r?EN_Y@N9g#1vB_-9C%o7g1nx~myh6i#R2VNeA7$!a0{51 zsjBFhmo4UJJlzC%*LtFgt%_H)zI%87O^u^ai2A!>Fu+VYz7*3#KSiYvLg9+GdXW|h z85oRZHyZVCNa2_1y%S{RQ*9|`1n{LbDAP?x-^2OZ34RP?9jV~LU9)))?_rxkpcI%J zB&CXvzK3FDgk;O!-2&6dAsimy!#0L6sj^1dqNDEtFzQ9T#g0_u!*m&P6JKZ`=-~vI z2Dp{{9MHMmdueIJ**X?jTBGk__TTw{e;}?S)oly^+skI9j4w10c?mfcxRqG7nx3%+ z`WK0}AjC2o0~d%K*$LzVi>u^jNqSBt5M1^NWT5;$q+PcegtOQRsFD_=@4-tUa3LR@ zu{}}QcC)wg=qqw4xC=JXBG~(S-~S|IXVm03Ijk93+_mTnwn4v?3x!E z;5Hs;4`46&G=P#k6G~pNuPB%wFa`p9cIvp>lp6Ptoy+5;q5Hue@FYl)9zAnu>7xrA zk;N5^&40x!<u$)6EhHG{ejQ(P4x45PW#Vg4^`PvSP!22XIFKFY2x! z31qB{5KEFOi2uHW;vsLAXwvQDx6mDZ58kJK2Iu|hrvN+0`?)nc4PeqC`0=;-z%l@94`t1*nXo-=She=J48v=%_5fzn3CJ;o zf3+8pDcnl5k;r*s5C*<|FyT$=utnw834li3N+tybV2EvANQG&a=6tjLMJIq)))+T; z6*xUZC1WD3Uaa4h1s2%+Hj?gb0(Aq(D?l>3GPvjH6h)d&vgiN!4pn~6gnc2Sd9pO4 zh~>d<5PX%xHe}>Q`3uCjwukiZ8R-1kqOhG(HINJs6qMh~35@LkV5K+H(Eo~^qrJrF zAKXjs4`c@X;N?#9XrDiv5X1(A?J~K4hOmvpeGrY2sR;Z1>*?Ci5XizAud+HEIRDQK z2M8*KJUouqrzK5aKjb1-0RuP#gIlA&5IG4dFph{}T_tiJ{AY-qvL;JRbvj_@vxH8$ zHLjj8;>D#F{bCU%YXaQSDDf&pds`p>uWav6K(xQFw!CmItrzi=$o2t|W$bJXkddmj zi5&P^JRfeMyZWdIs1>0FAJ&nOAEn+eA`-Ea2#tnv0a&80!V7n%QR-3>L6)Zlu!)|w zK$%H%@cPSJF6^LQH0r-##r>sy)$)yThQfpqBnX}qpf^BtgFx<j(M{$>E^BFR!AA3Y)e?$`pIkz~N!{7N!)#I=$V&(gonxKZ zwXD8s3B(ZZV$%qjteH?l$KH3zqpxOyJKAz9<)d`KswT+bSU>4*B7X+qB}JCw=SYZr zL)J_U9(>#(vsTl*EP-zqoOZwT%!EMyfJSE2F+*g_FOnd*p@R*$U}kRJ>WJT}<*ovq zN&3UcSZ7JFr*{OM?51BUPXkvZfTsIO0R8usa#lml_@RBZ>(Gv)2$N_-#yFDY-LQ3k zy713z-T$XroBwM{$G;M(UzYJJeY?@0xd1%>s@(qnrgHoD$@^NIh>jePnT9Ei|_o!quTWx zn2+>EH@cAi7`Od{Q&#v0obBXWzQdOO+G}48T%#@X>JufZdH=BR_y)83`uCy>4bfKW zO#n#jxeYXNF@S7nwzf%2F(>QjEf7DZD|378wCx7ka2;D zicVnN()@e4|6d7d|J`D~-eP`axcg^#!Ot}rQ$+UL`|+UdN*;qnFvH=|fn0_D<;Z{J>IRs$|{Yur>b6SGl@%5F2_y6PXEX zw}gcQZ`QuHM+Io&@~gJh6#oOdcA#)r6W0l@Dqx$cR%8gCd{MqU9bJisB0mP%H=m^a zANU$^x-z8cG>DmlW3(Y>%k&~K$3lxQZzBjwf)K@Z+2((8Lsbl>E9}Z7!wG3`3|l6s z@&-+0n$vL;bc7i;XU0R%1 zmVPq4DgtCWbeZ!Pzbiz$%PeC}oOOoUBEkyVQ3IWZw>oD{>gOtOJkRgH-r`!!e1mY| z%v>+mDw5Pgtj}X#)|ZW>FUElrshnZYk9UJ5>`Me{^WJ3G2tWkc>cl}Cp@iDszT3ZO zk#f~ln7t*dyerYJOy4xS zm@j<&ng^j?(is<9v*u5J9$w&?OhkMWm2a$L{&&`HcO@dg$qpvI_UD1HF0H0AiKcf* z?!x=AQF7j$DHJ6FEBC9-Wk818b@K9{21%!I)b{Ek5Q{r{K_Z2JO8?+{p*(YugTr-=SBSEX!(CLjQ-b~@GEaZ zR1s{TwTWQ100aKt(GP;I$a3C^0t}5c9f;Svs*s|<&&#H7cKqsE1tGkZNMD=`83dcR zab6QG+6m-?jLAbMXudSE&(GDJgc^6-+5x`e4PeUX3&uy1K}I8N$o4WDYKioGF*|hp zRiB5F6h~ZNv>&;23VdY@1m0i8uf#yy_{~xM*LrrKaKO2(H)n47^%?q?k&0>t1TRqj zM?d@D!3#x$rMv%=54?uR9cfvQL%6JWo@%DCdkqy0aqWR3Yc9dc(qo&6zAG66?b?DoK9Qq^U2>ftRC;q%jBj@T~Z8-p*=Uv*zIbCydkU4kwp zZ~XqMdyl5CRCBkT8uGXkzPn~>PbU!=m7R03<7KMMnDMV<`Fz?#P{ua0Y4!A!9F#ozU#*{}*G9 zd1wQmqoPDl)%vfVDv!rYMmlFH;Bz}dv-P>N;j9ias@wVRhx*EG?u~`>?8gm2lwDZr zcLSrKY*k1UYXd{@AqZzr{v|5REVXskUg0IDBkr*-t>El!Q%EO6p!V2Au093|`e>W!Tw|I&qU)5kd7v6sXIL=+^8)KV6-_iyX+P*L#EoXvrUM^ylO$|2H z?Y9S4kHvsNPznVhv2ZJdjei7%?$O`qG%tf`m2v#j05jy4@`0(q8IDaL*bf7Fd^AzB z>{xsTaM-e-Rj#@+5#hGQ3~klrhtoWB7styT)udGWnjIIWjX2dqoq^e(4Z%94y-@{K z$2!UPXPUKxSymg|;!mrx#CWjp%Yk&Tl2x~{$lE*3u&b$kp@)QUmx3N!r!qLemIyuu z1QV}hx-l_0w1Ea__bbT+0_FZo-cF!#SknYBsc!&R&;4^-ZsrLv4`~CD$dD8kEEXPs z^Y>CKTgsyIVE+2ReW&LOF4NP`ej;wXDzYy4a>%nwqBk-(50kle(7fQeMd$JA$nef> z+?Q^gtzQ4~)9~=al^Md+)S@&~TOF59j!t`W&gYyS(Rquhg?5>#{>^LxD43tBgpgv#Ri!| z!rvo4*Rr$CDOXN9J5Y0IZTF`(2Z9*(U~0O?=VMZD*xY)-Lu&9H$XfY84Y2AVhJvGl z&%Zh4zG}aM3|;n1#MFULN()jfw)bJd5an2hOX;UQoYzljx6j=lB?CBR0Koo)-?$=vQzc+mVdbBrZpPg1?=hKR@B2SW3@g^oU_ zcdk;LK@;EGsvs}bjXVNYvVqmvS~UnC2!%>-4gqpq$_W(UUH~M_Smce?7G^)(ud`sN zPd$^a(ffvtD$z3xxo3EH0tIklB_GtaC)o4b)zWk@d-RMPq`KL+K$A{q2ut(YX9+7x zT?3eW7)j;zcmC{U_fK69f?B zQuBNS6TsUzh~1lee5@=w0n1;{tr}-mZ^~01D~ZE#+VlEb7py*qas03gC$ODeL{7tY z4FR-2^XHgg2mbio8Da`%>4zVz=G`6tl6Oan%+ilqdhxIs3u%Cls|H<>TkGyGX6arP zvB7vx`=K!K-A@Y!VWk?_g{T1v!_?0`7k0iPEGRV%wxG@7w%|>*BsXN;YI=w7COja| zjX_4|#9OVYErUG_(2tTeFl+7W;ZoMhn97nk85Ic3|j z%csO(C3wnpG}3wz0~dn7T1r8I65J%eq}eU_X1jcs$4h1?UCx6cN@BVLF~wSQ#8Eu= z@+Y?0bqFM+E@=J+nBEa9{!m*+@T#IjKn3fTUC6NtM_-iPL}NF&X>BfX0VMpbm}NqU zy_H+(jsf1Exc~<-q};wPo|{1{zO^$On>V|gIN_AAdt7!;PK3vEepA zsGdrUODd0;Av?A&b3jTIauD&oGqF+#jYyE7#~-&(Z<(vr(t44jCaQH%Os|bw7jmrF zf~-4UZ8?#J1o@b>*at~kEmOji$o2!(Kzn!%1NZP-#Gz+=NiwXoA`I>7d>IBk6^J~| zu>hi#+y!M`Ev9Su{^znAT_JC-4B6SXzh>dmQLL-&1GQFRE_kfb2t8qJLuque zO^1<4H4}~!nd@0dAO9IeH^X|_GmVS_+Q6^mBXZh17hqZlqN!FCeEV2d$@imt&pQ;{ z$M}mlOk2`V`fx=7Sz||)| zD{CU!$PggwR5}4gSPKeP%N|<G@n0AU-J(aSHmfC#$Ps44yR zwW@alhe5zs1H`?X=J&4qGt?a|B4G#Oo80!(Fe79;Zr1g7b~~d6>29W?Y`sf+Mcsti zNCkjyF5NqW;`HvTzu!T`OBw>#au||X%l1|&GgTdguxVb4gBwYsw+4b`?CRp!a&qy; z8rZiiDc|ObH`FlZ{2N!wOgOv?4{sQt1xvhY!;HG{e$7v&S98=sz zXL6V*js0TN@`*rNJD3gio~@`xu_w!e?W_ADyw4{6TNZ29`Vwn8)y;X8Rz?dD*NYN? zOxhIupu!9Vda(k&uWrz$Qy$xYM|jURxJzy)3pZ@0A-JFEmZc=UBiz7)2_Y~2;t-z% z&1_l>>6matGuo#6AFsYUFxYM0_&^u7@d(HsCSgyJ+49Y>>l1{sD(_v*4>9|aA3}*j zS+5Py7$|y~2n8FAOWItU_O$GJXuX4UlrD24>((eOB7q2e(w0;tN;NR&(GKEf z55SA;ePm7^;aD*07vZM7dk?0iv-+K1+pohW7bxiQ#oT<$7jyHM$Wu=8vPLCq_8cLJ zXx`duB!Ih;<}A3+8E&hdi*FJQ-&hECFLBz=7fW-mXzxV8)m6d_8}`N4^t*ww?;uvq$S+Rjg?&Mip@ z-Pvpj*|uUEp125m!ZYBW!jFc);1>QxTYMko2%D}3@xG=162u#b{#m?1l5%~!gZ@tH zC2I*94ssPy4;XV8k-?QTvNca1B^`C)6_5%pJB0nH#>Ea6m@wSg1)po0p~{pV?gZ5O zIpaSO%KvUPRckDydVT)&Aob_B^dqd49U{g%AmOK+*hbhuIrNjpt3hP}c{{k0D$;OV zbyvKHC+_Vkh27%k?&9yhVo=VU;e&p#X-qnBCofUhWkdEw*?YU$sXEr;v@Gv?Z%)%0 zlkDFL?aXobhyh$0h};~E80cDg%XGTN^JI!PUqoT^>_D7cEdj#<0v(wQx`kOM?vmKd>( zQ-!JekwJ8~z+Idz+;?7Ys&E)-moWn27bSlS8=^;&<=ubyDHD2o(i$f3HE}igh_zot zMI&q6DbD~hg1J|E2t_K76y!u4fL-7jxWHB3-1vHp>kTmEZ?1jxpMJ!kU>U5?6tZVE z+}0tIEOq&A0za{exmS{Oz^Ncg06Z_jqj8uLHk=3|PGP#e$oDwC>nGo1PO!lv|E?$f z(eq(CL7h0>wQn8Vyy7p${|4U}|L+Fl|2My5{O<^x8Uu7(JAV^1uv&#yA$2|#KM02I zf0>{slHD-QprPL_EOJpCu zIzaCx&JxcEQTCsC2z+ka3`MOS_x2IhGsEq5++4bbC;%?EirlL?75OSZ6n>luCCQ&` zyKo{X@1;fQ6W<@Qe^ecPg{lup!0xs%-rM)ppZ)c3BhLMQk)aP4%og;9_Fc$O_S)nv z64zPM8^*=4$Eiz1Iv^-Iyl&2MzCX6a=cqD?R0s63rt=yC8`!x@PoM7q)gg*84O@wV zGmD+Mi065CMMB=4NyX5rG(2nK*+I%UA=r`z49=2JK05`S-6NWJjX`!XAh&>SI`xZl zq6jdK$W_$C_kKhFc@w`C=!KBYJbnu-GUWHJ28yUixtOQbllHG6Vj>d&FAJzaUf-iy zrE2JsxuPNu`W3D))zu&y24b*??Wh_7XUK~`$wAb9m4j3@lrc%IK&c}Spu_I{@Qn}b zB_7dD^97G|Ks#VRJ=iqArGG7|hPdE}=cQ7({av8Q501UxYec_J?V>yYmu2^FQ@c_r zK;KUcZ4AS2GQxgTQM5V~Lq=E$IvF^>q;AlBle!@eSOVv7Q#UmB1L#5uE%xc(WPJQh zc;%b?Gf%jp*MBGfOb9OJRsiKaev`KLf4zpd-gSNp1nE+0($~!9=FvbnB;GgV6H=Lyff;zNifr> zT!y$cW#aF1XMR)&-Vd(21i3T2q2A;pS&f@g`zGA&H41kV|82P2AT$%(A>}5G^!q5e zlw+Va*!d;6Zts`ix?3nyCorctXa`?-Dn!X~q5_HhOQw$a*O@wZQKpXPZ!>k?L#EE5 zn^59z{Y@0i|N9=e2naAj0qYaro@P+{YfatVNA!`h3zhXOg7kDtxge$qQodupj7CXO zj8ygbeAhQ*NH%FNy2`LOA0y#C{e|=T`?I;a0E)y64(I=ez4wl4D&6;oM`RF3tk?ny zDhi??5_(rr5UCnKsTN3pgd$Bkhy_K#1_TnS(z`(D#R>=nQA&`mA|PFQhxhy7%(*jj z=FH5!XWhHj?_K9VMTDKbpZ%0iy|?baj=%qf1pMWOfC@*Z0Mt|Y>)#y}&T0rzfvDUe zV21}}09`h)%(!U)?eRtcmfG{oL3*AcBn!6!KVk&=C3Xrmt43G*LA(iyEggEfM#w>g zepqpFUF!S|vJghzJ+Bb#Y)0`K;rdMm{Z5e7SJejb3t!(tQ9Hv)37E^j*t*<=jiMf_ zk~G}fUw@)^c1&O*e=1QAs06{Y2k?wKbwDLi$5DG8f8D1*oJNyrIaGy27!VURd7Vav|0v|#is<{aV>LOrAdW1W#M%Y)_A-wC$BgvR+tec&O z#0B9d%j6T^1nLGTTkW}u^t0mWuL1mfRtN}Iu}^ktM9J8_>MOq)9)B=Oy&aeUah$-O zvI2jcdiT$132kPL15ktp)$?RU_rZJQj ziXtisfhAe+yU*=8{kR2Q(l*G}?G7*ifO#RagCiL(G$RtDokd8?DC4kxwN}Cb!jf{_ zpowQ8^CQ&H__si^QR~MS*V`bbabRfyI48%p0!RC8hL^no!fj{;=I$fJJnWx(&90e; zLVX7z!_oS_+~l|N0`Sx>Y#M>8p(p4)UKqHCT^RvM2asx(W)E4uagdZ6FS9g1h8S64 z%sj&OLUpru6igQPB_&hoeCl$gd4S!_P&C8>x?%JWpx7*w{4U{Ml(<7fZld0&=?o;( z^yMF@N!fu2TnSKWND%Ayqvo6$uRB50B(kcoEHrP;d*uekGchs;?|30YPdM#?r-g`H z+(v*(tq&LUy8;XrCyL(9iat$aGS<>fa9F;4@GOZngT2DjUVNa9( zzAhefR%XQ`S$_THDpq2g9)~}kgj3SO$MU7%kX3~pLPY3dNQ@!{~;6XU=lc$Dn|1<=doiu;Pv#n$>X zUxBTBAv&Ffir`BE^>DoJA#$?~wT87{l?2HvqHJAEBB4BBQ&O*pw5BOj5d>|xcYstN zb;%4StyXv+6fo|CLbz_!iv}GFPZQlu(=1ybQDhtL({E8%=AYi!3E`MI|uZvSheG$*^ZzQoQMsP|52of9xzOC$Harmbe_s10gu zC{{*X-0dxZ^~?N}%LKyid!LDMc0#OqkN+d8pr~G%k&-4wNdcOFY(8kWKnQCaOgcke zKZb{_#XmsU?ZOqoszdBC^s6Y?1Lbor^dNY4sl(r35If{DW3KzYOnv=DNCeiI0#afw z%v8e=7L9p&9M-+N>j5N9rqr8_+2sMD!3o6>a+4tvr|-0&%;Bk^CR{lqC04MDUollJ zXtmcwJdj}|3mlYFFH&FN;%=dE5>B%Q(XFC$$%Og(Y4Zc!-H=sxh(oB;6B27(qT}yH z+d`;O$IKGh_X_a*R?h-_d4e^rl1zJVtM3HJkY9?Gh3Wk!*jmwQ10j#8`E$f+>Ql1Y1iFW(=FQF7$;{RfM)gmIYkCEb^v(S)li;1khH(v*8$Kh5xq%I-c+IpSz2c((-N3PrD? zjJsv7KlociN&vM9r|QB-kjJcV2qqI6bBv2!UFv#KK)H5EHle1-T{PLeGj19$+F%>N zE0RJO4?BGEnDsj8xKYNVsS1I|>RnngS@C4aMl~um|BMd~BP+&3oEmEL*e}>M)A($$ z0dlV$h9Kme_@)b{^l{Y$3H)SAS9+5a%92qKF|RePthbD&x@fB=OKhT{dcbZMr+YS1jKCN0nVyOm_aCTn~XQ-Uhhl{s-_7yZls)b1F0(FXlz=DU# zQcpD_*b zXQ14~HptpxJoR&HrbkFevE_V5!+1`zzWx@uF#e`04B))xI6R~T{;%BNP?Jg7L0Vla zN`p3b?cMjxA-@?G$9_@8FF$TG{Ai||(m24+(~0u;{5|z^yNH>3 zZMi=>Q7!uS1g}xnDc&r3#+g(xx9+Yi2#?a>z0LI!?FBq%?8O9~()CobQ;IKDR~@a& zYlKiFJFPUCp}gqxL?e-~dmvl--d0E*bb#`$DHa^%1gB7E#J0QB2{kZnC0!IhOHmR# zXoeGUgB;a>*pDdOT>_T3**uy-uTHbrmaI$lDPiywBRbCV>iS1_L%)>Vl%YiV z4twW?23lAl!KWc}ltYMidG%#~wzDlXI!Mf7C7O;sN2ekK9iKc@-Hr<6I{zIYbuie; zo*v;#(Wr_gSzN*F#NS?y6HOt*%4h}A`PeO$hLHI!;d;FTn#778j@)3^_{LHKL~2K< zs+i1I*jy_>I5Lf9IfqKt%+@0DYl~PDS>7Y~b-7>^6}Q<&K&Oj(_qz`Rl;G>+0|3}lv$d;o?=GOZSbnLrcA@AO zEx^2ee1UFxgJrZ}SK^MG4o|llRTc(n>f7n??x=hZ%)xkU&?jwS;uTH*jTY0eaL(!J zLW4sEuL%SDt-!NtKniw!g>sW0UJVBmCIdg|bZb=KOQPsN_ZK!Yx6H>c85%I2;=HhJ zL@6izJV17RlCAU!U|OsVwcLFn=m(+CsRtAdHza(gU;Z623#^s~ai^*s_*a_28BAp5 zLtzn$%4zQ9konPGbjXx&&oL#`pztu%d#SNh1fR6X_O$AYDp`uCV6P{P$GNVKY(wC) zZH91(x$JxzmRJJiD5PE*Ew6zYF`kg79>|hqW@3zx)mOmDNwEdTYZ;tRgYGu(O+<7uus~tPkZVj%`cz)LnC$wii~a!_kg-|MC3Ty z<9X(k7xC787O9c#^iR@{D+bk#`p_uq;)Tk>LdY{~_e$G019z9eKn*HPo>R6|}6 zXcDmUw{BI|&C) zx750Gj540F1hrI=YS92xbUV+x^X!rhsMkhj>s|t0jdk}Yi2#)s+-|yNMKZJVMjIW> z=W9pA5R+Bw|5r5661OUBddKl15FX>RbZDIj;-B{bO(}Wx$UkVx?=*~6_>oJK18^dl z(DGf+^kNf$Cn>ZW=RGMD`+qoq_2yQu)PYny=^y?!@h z*Siql(oC3`{=ubv#}#uOnOs?Apkt~Lq z<_F%81w=?u!ay~bF*E;zYW|Ke+Vn3Ak|aR$noI0;?U#bzoWlN*!@}tCrB0wn1vFiO z0qf&=es_8{FJl|?t_Pr>XE5G;{L}fF=RlSj?r;3^<Qjx$w*n=t z9iS%m_6l!%fQ{R_VY`B7(bD*6l~BjE;_3a&yDo%3N9Ab_o>QQ|b4_@?%G;31ugCDX z`*%2E4|>jrKE>uP=6T)!8JR;`S^%W@I!-Wd?B-(C4!lYXg<8G}RKB;RdRNNR*jw#A zlW=JBYFw@yqiykqI6}JPaCc!d0F0Nv!P}CUuZg*x+CUKxz{gV$J|Nrn zD5dAqrg!`n4f^eW`u?#8*gq~rdiej&>S<0r@{UlmcY+)ukDuHj>wLJAH|k!7{W9LK zhn2=#I?b8_DnM)4LEEB{=3WsX%>2_XSV4Zcy`}G%D`?ISO*QG?JhMj|7K>}Z=p5M3 zv0D*>#6UXn88DBDLr#D<<^FvNuTJ1hHc}K+A1k}HOKY1qIU>|?=Z$a}-r{^w{RwY} zecAbPW-_F9G2h`mZ2D${g!8uxW%lnc^sZ-OVFj4(MPx@o&p+{u9j#_0owq?BAbq3| zE^>gVXu2U`54y;teduHkEMA@N<>SC$Fsp+e!Tk3HZOdmu4l>6%VypIMiRbXDXCqDN zdL+R4*P7D*PT{)A5g2#XtQjO$ZJigH@5yf1McdM`R2yXxdD#{(6V;2jBHa zy9rp{|3FUs-&phsnq|LVx_`6N`IpymSZx!WzjGNu zI(x6MYDWU~J|VVuCF{98z+tGY?IEelff>08;-vptO5X^zZ<^-x5aBA9MlHP!?FMQ->dLp=Tf(0Q|AYQJrZMgz+l>ZHyOb zJAnU4=Y)Ra-^!L&5o!)3D-VGF7m$iq?;ok{y5J2W${)8Z3bFj`Y23gL(wuJ%Wxvj4 z`WqUEpU#l~}FK-gd zd+;wCKXOo%_oobk-KsXga+el5y{0GGy_{(c?@ z;VRrO7XYccH~vJDA=Eq_-^-a-hxRRL_Bv%#KIN52f1X zA%3sWF0P;{KuNd>P5X?t}Ifk`3 zhfi!X`qJ*)xz-5^g=hsGIbSb;-aX<4R6(Bm@QKUy>uw2X>{!OYq0eIr!=^!c7R)bg zKAhHH9R#PL9M@WW9>~K+4`4JjOdco34R7|~19EoV%`Y#-GB@}I^)hFf;bZn_p&j+RAl}A23E}hEOZ5t+rZflw?cQCW@9$#ti;8a%uX=PxvuO0 z&u<%8gxgcDq5^g%OwB=7RnAi2`#Vr^kK#fk+MP`hnLhj{?U@r0`7*%tD&8a1)KsDo`xL(|QMp&>Op@uf{6g=tq&lU!4hTz6qI%*j5 z82CRuv-84aN}wV08t)4*xyqljP+?;lJO)dM(Ni<7z(0k2{$lFORc{C8Isbh3{GEeo zCyj^;(8(Jd;M)&&`4QDZ5^oj4#TS55ww+CDI5VujXjG-TozkJKR1wtstpG zS?OdpVyMF5Nesa@%^r1OL_XAx42Q2SY^t>oaU=FlKr6}&MjnK#y>m0bwYPGRQa&-x zdwIY~`eVff5}~Gypy_c^ibx?+&YuS-*;2=yIdHF9KJuSj_;iaqjC|`EFgvX<^uM@H z4GmUXkI_Z$D|n_h;n++=w)AvguIMkcy@nOK6TN8c(zPWQzPSMQlnhdA9R3x)3^ma# zVEd?#bvjOfU+swu$a0ESpU!%ICy@q~bErv8!{n66OQPX&5k1)2CR#*sNn{tVkKU_V z;jJBlis&^pSmBMNrb#FXDXY1_A70?t-yNI*4>{tq15`^)X9$DEc-Gbp*ut45#xs(x z?nGkEe$7%_h+NAD(aOtI$kMU~mD8k&fqJJ=&udg>`&NF=vgYJcnx42AgCiFa=H>3F z#FW#f=)DKb%APFe{O_Y)D{pVL&^|ZV+6+bCmmd`|7xR}G3{L)o=4;gT`Ar`YDJbz%rStw*Vf1U*I~6;EUqA7{Uq#xv zB3~O=@TTSn6gm8%pvRU#?u~bgX2XPEQ+^p}zXExWSv9f~Q0NOw6X{+PsyQS^-CpDB zt%QP-@O^lb=tC*bSHgs^!J0hFB0hDePso=^C{nt*!Gj?Rf1G~N& zmvz1};eZGJ82LSJf#_Eew*S>*KVR&w%7J7G3HTo;o5Mk;^;{61=Ve@`-sRS6Tb_U3 zYoTH6k=|(z@rZm;jN6^FCX4SB*E=G0rNj@75eBpJXLrmNvAsfVfi(Bjf`xq_@4#SZ_jVe0Da!;QFR zeoabVsiMzWv2aqKVo!8l#SaHM6DD1N6_?Y5XVP2(0-z$yIo@}#H>Zp#4Zc_Ao`@_4 zI*mgucEYpM);}fKP;3fdef756988Zl!r(N&^JqPDb1)3fxltQ;hks}D+#&8#wVltN z%%*3dUL{>c;~o11&Cz{p{jtRr2?^q`5m4ZKOc;>?GeGf7<`s++g@rvUe$j}4#S}l{ zjc)$>K6+t<9oGp$xYom2j|qgpqnaWrh??dc9go+OXrQIcg7oTR^<{Oz&O?eVXnU~s zNu{!FpEH#6^ix>byBH5lb};P)e0}Pd9{POZR0d6Ul1T zSCg7#@OITAZUozh!r1iTW@fR_&q(FL5|EN(5m$$JL6!B@y!@%A#4Yw1PW&}!ofgp| z?s#(~fx?TgY;X#sN7Nn>QR5YHImJm~X~<>E#j;hg3PdMU9fE|l=ibPsl#qh;8V5qL zMdIZHE->odIuppG;2AA-f~IiL#+(uC5Y`oC8F&tu6V8ZrrRhoH4`={Aol0Cf?$i|# zvWrCFHPP%o=Wixl2wJF6{AA#aj)}gyk3o0<_5{)Lwm0paWfak({NYg=e~xsAXI%LfzAY?D>PEvjFtbetN4N)Z9&QkZY3K30CySlrw77&6#)_uLiu z3@W^yaKyU#J>__Y_3nFymkg51oy-dezfnEUZO9Y4`oM$x7bn?{pRB!lq!b;8J~M5S zXZAz+gZo(jd`?3OOD0t-2rW49&%RGuSnAv1*kI%4pHHFh1Z=>xl6!+A((2F(%GPYd zY_)J`;0m+MeH|EkYZvv?X&}^AhFy7A+Xj2`e2HN^4E(*JP;OTwti|#oW*_e{$_JWX zw%UE^IKoTqfZggMa=bnY)z|ntBNq8PKcY~NwGkfx=(N9HkCw30e_q1jMmBD+LlMeC*MEFvxn%TsCu3+DGXTdWQAN#vR#GcY^5g zm9gOFUmHp@EcEljuBxQ-WyMV=8-ZG_{cu9p^K8NQ9=CR0`6l92Igm)X$d0>|dZk%8 zi&8nwFBrg!izT!NicUtPf*U&Zpn^e>_6nGhE67?eX03sjhw3 z;|Sl>Mw5Ydar1YqeohfkNWV_h#rgA9CnXk%Wtk0PJ2~t>NAHL=u~JnF!8#KS6U7<_ z67e0`F~wB2G7dNF#maMulI)79Iq^IBgc}GMTh3EtbU61ZweG!A9dUp~zgFJmV~SND zE~|k^(4)weYO2!uORux#Q%7iw4=ElrTA^Pt){Qi&St__P!LH730?n}|*}w_7DyERh zjZ;b_j>b2PcggJ~wFV{>Gg3p{z1zDtY+6x$GL7_j(HtGI^;Kb~m(6^)O#y)gxT%D{ z)Z~H1C`(cn0cO9(hoZJKQ6J+}(@7W-h0Y;SJ-}xYN;lTG(=!?$u~slfD+@&4`}inLe&zH^%rQZt-C zB@YoJr6<_NX4CwQl#W$@+dpPx!k)jqqLoiE-Y=}rKoDL*f;MGegXt(}bu^$EP{Zea z%svq(;-s4YV`VOv*PQz~B`7@j=V%NqWPVnu%lxcgRwPBx9OSW2yXOVVZXUGflsZLw zS;MdipbsQUW*;vghi#i`CexoF&EK1bC7h~X2v&J&u*K=o0tdh){uTtyULi8ILR_z` z_zM3RD5lE_%1#Xq1j7Wv4-<&~HKQIQ>SGz<7kxKhf|bAFK>$5dn*3JEe^q!Wb+yhQ zOw10{d=b4-`6d+~oA7dIpRN!Uki(BQq~uW(MJfEcE%^C*fn4Z6Ei*N_w)E#d_#=*B zOij6`d;GUi2ihT5*$h<$GH#p~Ux0&ci0Kv0g(tQ_I=Ukwv%SIP`gh<+0amvTPqMNY zC08_yaB030>lOjqH_sUd!63nv7PX?+qsSErMrCqU^?4}QVP6ftFW?2#U`muy%qHv2 z)ha)nwyd;G0}v{j|#R7>@~=GXlUP11e&W^d^0IK2;`u8f+mLo()}{g z=Ka+o)`Hkw18OKS#n`eZgi!6efiV3uhH)B{bs(}zo9{-Q)s}9xrfPw`dfk<#7^iA$ ze)|k6`DPIOX4$-={$!oM)U8R)X|7Do`+UcN zzx50>Q=g+Ss;3uU&0`0tCvYN{E5Cq5J$|N12+td6t6F7PHITUPApT`t$}Lb%=SAvV zetBbc8Z_Q&co;$ZeEWrX%Dib(U5S35fsz)ba;Rub!^S%B)0vh>v26hq>=@xG(gR7< zRHVdV1h*Mj3fqhaxSXka#;260KNb@r5BWZ8yG1Ah>?RjDhL<0+>7?NW?;ndYbgKeL zS5y{oSzMJ4Nt8|fko`PBZOkR;2JP1LIU!cvx_>NEl)?509VU^rf`hz29d7!o*n80w zgY$5zvQer}Nv(tm^=?&|0Se>kuRbp!5%cwgvJaqX83r3p-<3AUvo@^qu(ftisTOvV z2c3R9EkREJa?dywUd532X%e^^oP%7nyN~Y2Ws3fm+|NO$&l0Lcv*OhrQ&*hjEe8nqgYW&aWU{zT8tl1AELs+^qa)FOkeWb(|ysBH{%R>9@_YbS231;bGT$KP2ve58{WeJ=PkkX4D3tsQl&WAQiNR1zN#FL$6(m6WRFy; z#n)7shLoRRd0kWzbYK4xN4;eLHUD&3Q_N1ts9Rf=V@8o9ndzh&m3aW*G|r+V*p!AZ zX$VXvNC)K*D>8>FMoepibg?TdB?Z_M{DM^K{35BsIWT|wUld$@C_<&~Lw!m{X(xe)Jy}^0z8p1weGZq}h3-GiTE6NErh(x^F_|#L{T>9#)Z9YUO+~nLeKAZ0a6|!YzJ9 z_w1)$)wOzu%Eo{F#YlL{NFa>OQXV*ftJ)JdN5y;DP&I+C%)}ZnvNaBWF zM9ey4--$&>(k&{^^!p2w6evvFj2uqKV}uNvO{MELy~8#pT5MIud>UUG1?VWX8_w+F zHhBj##e%CL)2a_e>nK!pKT}o9~y6flK4uHZBZLc1jVwPS6Uwsb97TR)jVv_^kgjPQ%8DH%xyBRM8RPXopkm+N}FqgN_F^Z7z*U@f1}Y z0PN|#>6HFGLy;R>8;I2rCnof46!aGjO>{*FKJO5GWV0#q@WpFzG9@MBK65}~OHkjD z?;KUEDbG1pX5}LacO?4~qUV#GHWpMyC`;{ue(da^K;**65Gc+TsHi&{he znKiWAtgW)%mZ}6Fta{IlMFEPO*W!#>ZmV5YKDblrQQvA%c_DPH&NQ6>8^niWcAq%( zSj#FM_I#(au#YzGXTG*KN^j87i@g2THMK`ENbte~j)ir7Wsn}W-zBcbRG}dh$_oYf zBwcR;Vff$}2vz1UdjBRk^RKM0?Z)l;-3x%Y*nxVz&9us<jN1*9 zT4`DN@$`gzwrD=(S3l5 zdS5@^sHXnZBSKnToYHok{#;UyF=rw3KHGxn4757fy7R}YZbBho(0f+^;jaC}27boW+SPlkILcPJnyHSjzwy z+XkdfNWox2^gTO(IxqF*07}`6990w#E)a3Uw>Oz_14P^tYmE{!+aSg65oHbnw}J}> z@5u~Z2E5ajw;y(|Lbzz1vqoZ$2H+uS19B%=iWh)K-+C*!&S!1_x}C`dLc+`%ta~Th z;xgu7&5pO8gMUPpSCR4sRPGc4$>}A{VC}q;m@xn-Bc;>g z_3e#`orj0L+rhjE_ZhU#k6cOyAdSp=`rD^OTS%NNrS$Ybw(0{HV-zB14Mf1n!K}0C z#*_{oID$N=Tlt0tdiDGC;nZMbSN^d%YZO~)Q#7JPjD!@x)7?}3Y9iHvGsW{D2h0P< zkhFa(iPDMw8y=N{g;20nyAu+Az&&=*?6_?gTQEtzQrk`mj055SQ&e8WNTYxtoKY2291 z=i5Bk{%9b=4rZl+OSri8+2TZcz5LQxQ z)>!Kni}>(Xl!#hy#bL%_V`262xwrL)rxz1jG$cPUJAHYm9A{lO6E(Icxdza49>*30 z<4=Nrt=>x7#&MC*RRYgC#=6Bk+P@W8nTP`Q4x!ly6;g6a<{eI%E0%rY9+ACnf+V#f z@cbsWw0le$A%A0xb=_22iB}!9YzQ6+3}VdZUO@V=c9W9SxFtD-emq%(ZkZlHnhWjJ)cajhx!LfYu3zzq=R>*HKFj!o*Qt zZw0SJJc{}|lbgtO+UOBuJlqqQ-gxU}_pjr=RM_fU3<3D8-WEryfy~H&g27rRr2F>`*+orxQ z@}(wTp?{u48Hr$7m2*^RNYax=mMzL`pQqN;O$&7~M}{-8uibDewZKcAl(djlws7&* zPO?kM*G2uQCJXhc{dzq;Fz*Gq823wEjaU^U)~1U0xQ}$Ohx+rM47FvJS#>(TZDm9U{)SEDrx0-t5(hWI%-6*X~Rxr*sn@D zJMNCFk%BL6#5X9f+Hv^^!6>%psVi3~V_e{1@;wwADblYXC&aMtq{W$8!^?nl0bFp{ zs$UrQ6G|>Yb|8`dOLEGMW8?504^?)yPoJ0v9xo8yhV&%asco9J>Ll4Io5G&OS{)aL*NxG^6&C73vRTIK-b4lA zH#OYVE~cL(xqgh8*rd09Jk$8z*P|s|aj{|;xh%Sp>qx-FWsUVrjv~Pf2cF=G%coep z1#lt8+xKm2E?emH4yF?<)Ri&?M3Rl8`uFMT*WKl%?5w34zcDMGf3QNZiaB}Pf@>3* zF@9`+|Gs>#bA;j|_!hYV;IH=d$6kQ%PffX1?)r{D;G%Bb%3gu7`+n#2gy1sHcSq6U zwu$+xKbak6M3%XeEp&uJP4}pi3T7JbzJPy{)r;e&giX}$PVJ2i;Zlni!>q_OZtgFA zV5|`@CKuRH1>OMD9f_YuNg8)N%s3)XTOZEFnPpA>^@CF-{AXM!4aHDi;MM^0CDx)e~ zz$DvPqw;nz!}Lsz>qTCiE!kF|cPr*-EtUS0UfEBS=1gN+1@q%=58-xKWwJTDVyKgs zMEL2MX@yvlsoc{B)nW*6H9p)Qdquyl>KJCCj!OTc$6`ZSkM}%9a-BI4ZLS}%TA}}D z6s-#DQx(G#eMUck!doTzM3_9W=QQum;{BQ=S4t@UaD5M6x&m9$yD!FX1N^cva49s` zTVeSK7$H;LhZhf}_IBaP4-I%HQ?_|w``(-jhq)wfIENM&9HHl9U%a2O{&GD>hkIY) z*^U<$>fIqJ+}&S~Ix<$;8Syxzo+M%C@N&C?6M3sb5=oRa3jogp1YSkbgZ0XG8QD7y zs*_x$LMIegQpU54)v8YNb~@_^`119yI_{bnI&qy9gBf2LokGS-zVFeI!Beg~$nKO# z<$s&P{bNE|71sC8-RT&y5&G;%2=RRi_lc8ej*oF8H=R_Nelz=?Qbs8=(65`OV`D3K zJ;0x-Me})TuWap1Aj9-_B{r+Cs9`3Q;ZV|Flx|g^(M#QnJI}b!<#0a;AMtMLQ3*Wa zlA27<9r122Qn&}d&fz+uEz~;-^#}*rvIq+a8XaS^B{jqX4O5DjgGa^e`>UQ``0Tax z>m~%M_9pdTfKkVCh;p)yYCKz^9msGfev4J_l}_*X(676H(NrGhvV)nPDe`ISCk{ zRvpp3Ew^t`EY%{+6qAjoR$zyEf1ZB33KTR+*Z?z3(Zr4;H7jeW5#0Nq(Rf8nMiryV zT4IgJBic1m<14#paXmbG3fmjDSIw??7*YN;ALc9jFoElj<#sb|KF*(iZ*~h67uLu|sN5MKV!PMS-8^-#Di6Y7VRWAy< z`%9XMH_V~Qq%+ceW+eWZc{i~Gp{CgIA6|mj; zUbFYP#uuIpQ)p%OOJunz&Fy{C=W5WDo8_sd^~HWECZfSfpk{%OduBp@s?q*l3djEb zIsGY>g2=|PE%XRVu$VZ|68j+0Rd3Td#kNb6wB0Pf%G_ioO>TaY66{<0DH;vDz z_ZUoR=5uAm()t@M_9V@0<)8{&nfpRO@Iu$uqvp?E+6X|~Sv&N1bb&6b_T7=5xfBU%kw?n*MiB}Ia1y+doUi&}1 zwP3FfHTgfjwXWgN`X~TGXY+# zKM)qf(e;yq=XwB{e-ReTxzeEOX7fLU#TTAmU4eq7)8H_)#W&>e?hAK+Xgq1)8o1Mc zJmT<0CJ6jvSq}UQ03rMKT`;159r))O+>%XdWH_Pb80gGYfgBG?)PZ?n5VF6H{-ycj zKSK83g15hdZ2$O=--YbphnF2)T$tlj(lp<2`#v}m+^GS!JD8CwHGa$uPPVpC#B`K> z`!A1F${c!_beUGNuw5>8o4Qi$I&o*q3KzvF9h4S|q)_B`c3!oQP5NDdJQrARfLL5` z*?2jq4g_%VmXPsj2pcP|8U)qLUx$s6QnSf?q6B)#=acW*q4b5w6c(DQbaK^(!zv$N ztm1Z%e#WN}X8yBpf(@F$ATc3iq;-?I`&OncyP+O*m6six{< zhu`x|#Vuxmx}xrZi0f&%MZb=?2Dv@x+5d9H{gsgUf0XZ<`}MHvFIs?qnf3ZtI_>|2 zBt}ID-nWvI8_RsgMf^`l;y>k_|0k7HT1qiW7UAd_sqtpUlZ}f0e-^I%|LYeH^jdCE zC$`$3ORnIbi5O>q(OLof83ui|ha{~?+FoGY zR*=79B}?9xvxe^{23r~(S_RiygSHw$*+T%6+d|x5wP~Rj#E&8D-WqUKqySLA;`LUl zCl7Glns7h`1=mJDt%2OZZfg`A4w7<>iylaaI0PzrTkw*p4~{|N(I^|BbgqqwxcHa% zRF!|>l0WR-5YsdU9-L9yHhuHmq4mB$F+JS$vDHu8xprN|bBL??=-}DE8?%;4hwmHk z_0nLXOHSK6qIkiM3F&!5L503dKSHp6+CxDz-7dM9+&=e+R2{t&mu?-IfB<=8{O=v|Pp+FvYRo6SR{YXvPl zEBaODs(JEk&6&6O(oH|7Z5L_J*Mj~+2J&DB&5!Qgu>aBXf0+6*-dOjp0Ye2MORb@% z(y!;r$pwGOS2vuvD^gA^3I^~Z-dum%FjcjRXEN!9Osu3*GyKe`RsX*E`M7XKe_NJd+2Y!HGLS++ZtWAun6G1m9&hhNmhKjNCL|n(rhcz zO-f%G`0(vLu&CXWK~5=T%XBffMd0KEUN=7SBr4o($}+a=ojYx^wW{ut4G?(=$G#JJ zG@S)A{+`TBFW`sk%>u*7%L9=uQU4U#^55B6_2jrVMG_{pi;y0~0F*$jsMF{WP)U(5 z|1fNGZI%e4?AmF3JTY`%kn06>cmfi*(OyfjG1IMXVDsaS}=A1%W8hsPgX5^WA-=N-)nW z1DXBq&$!*cc_Tge9OTAzK^~h(W1*8x#}tqZ!*g|PaiA&0b?10osI?u4 zoivk7E8RdE?*y1KEflq=0R{Cj_(3G-T0^&ZS;bR5#^1^eJ0NPPC$94VVPOeUW#4%@ zfD>wRDQ(L+kQa@<1L@#LBCl@mlX>9NOlnT5W~D=Z=@3{8#$6EQSrX|l-u%WMu@>Fm z+tL?$N;fw+8FY6mJ1EFSsa1`rY8PeCmyAU?p}F$tN)TkpdVwj0#a`Zp9Hk2td%xn$i&VHRrzgy5PSn&195HP?_x42vr zB&I6n6F^nf6KW%m__2J5bE@N9BUJEpPcF#4-0C_I&t#qViN!`fDB~OOX6X_?9EgM< z-m=iy*M@>tH`CyGR10Rj-ee69gQ<}Uwu9HB{T^Yhx=R(nYo>;MzPSqg6oXBbc?FO^ z<2)9}mI?N>5;w?Jb1%6x3zz1c6;Gu<1#Qyk0P5Tu0(#WP zSHQ<@QKjgVo>0>7jp^*q!%~ul%To6hSm;fn3bg(>Ud5-Qj~?n+;!6;P%t_d{MP>oT zHn-PPUZ;;dIkz~o^g(9*<>Wib3->>0ZCtZ|m$s>F zxKXTdcxY__c74_H0|7~F$=EpJ+5=t|d@Bbakfk#_unA}IUpMt7X&z3b1 zG5H27#Jub9N(gM)`Xlpa)D5IzgsZ;89hnFr?ffXD{U-=%k6_MI?pA4RZw-+fv^aKr z!$}m%ML3X~`~Jg#@Z6C=Im7F$@jjMF5W>9@djeLe@SpxEm|Sc_Y^m!_3%)R5Au zxCs}hV)116$|v-+K01GAVwzx9Od8MiCm(L-`q1yyp7J^C_$sg|>p>Mb{=WLhey(+Y zlc(ovjUB2qP0r{d8_XVxL@~q?gGoGn`lQsZ!fd1&GA220-3dU3!Xh$NqCpxiV^h$} z3m!-1Tr{8q%s%y0euAHQ89RYC0-i?ZC)r18!1$r-qvdG7#bj-Dszw^fpbab&!%XRr zr@He5WZQZ|UvnWQ@AaZkg0RWi55G=W&`(~>3bZ%QxG+h-D=-G)6%ka3dHTpM4d^53 ztQTuYnI54ejeZG7Jw9P-e z?&C*ZZcq#)%GhDH;w9@1Ik3)^xA%+0!<^%-5h0sj5tbkPJ9>} zoirCMeZ2Bryi3+Yit=4BUu+Jl5NY8x>?wjt#X7q|cP#FNV}`(%XE{IG$eSJ+XfG}t zb}s-|v@3gDfyte_ITwQ;;*1pWuJG0vsXG>xArijOp|U6;EgXtpArXCy!#6R<8~(Gt zj)WHGR8|Hs(Ybj#E`(Nr_)x#BzNDnxS+SxOX%8LwgDi9!&n)bzv&cy;eEZN>r(3<6 zC$4WpAtb3wlqPryDJFHl%bDz(=U-_n9+<2aT^kRuqj551Zs+vcTOIU*&9AGm{;F8U zXYl_HH~O&U>ObRON2E{dl>b+7!2jC2DF}D3ReS0paAPjdy+Zq(GC{Z|#39D?Vpm@QjAhF}!SpGqgGu zRb=jiDD;rofVGZ3VUo5_B$$&rzaSD?MunhAsExiSon&sNNfE{$6Odg5QORiBaf~kh zl;9RG;jDORwaOa_R`k*n1~2Z8f%alJcWvWj|7-_+K+oL4s$w8UxYo zs69V^Qd_x^=m?%s-j69`1o#mQpMrv|#OTh3R0p(ZkdDMvz1$t0Tu^Z8?n55s0x+=_ z^_oylJe&T!COwrwllD!jvk-qh4XT9+f~Mm(!Jq&}F+JjDxHf5Koy4eAeTaXGF+QVH zCBYBZu6)|9OFeI-+a`53edLWcw=kimk7COLP&LnrpOQIV*m5xYu|FTMFx>Sr`OXgt z^2sktUu-bn59_wo{(M%(E~tiQIZ+|JmmVSY5>Eso<`R^xY>EDS1pdebZ_}0|#8O55 zemvZY9u!6nLM%xeae7WWgV6-L_iqm2GWa-9)`WFn4cqo+hvyeK_btZkX8#1@uD(Jg zVzQl8s*(A*KnhMt$*OG;)HFe&s^GSH>xX3~7(9uDb!)LzpB~Z7Wn6ZoG{CAZ_1=B# z^p^gC#QAy^+Bav+x@?ab@OBQ#sOXX=>GUKlzS8-Ce8Jlom4L9EfU{<5j=0pa0Sad4 zn(9D(Rp_KGl&G!TtMA_=(z2YxKQ!IAZ2HJW;WKixhsL=0)f) zxexY3PfHRdKihb+>IffuytF=1je;$8>Oq_IR@6I+F$?>&RGh<6lk=8wrLFlV!^mMs zwjDwyU)~ep(Szmq$$*X8Z3jef=X1IWa(Eh_dBG-l!5tT`;RrV5dmT@T0`<-ia8?r~ zuoVJ;8ul>{hz67JG$`I@Mqu-6EFTG;&C(uy`hW{;U}o=8`pOdRGAau09x5KMRX2Ed)?_l!SBLNc#tdVjOUTO>q+TJz=KLkI_TOHt zIt%uq6S+`d`&kyu_VK9$rMhh<*`DIyzDKGlS6n*14Af->%S8gxMewDfCS02aescqP z2u_l=*lS5VU*W^{XTP(BqrzQOW{OJPJtP=NR7g4qbAVH0hG6OmN7m8--=-|`vs}*J z9q_E`e=b7*Uwc*q{8<-2z1!LhbrBCcuF3WL?Z>AQW?mI*xZY`5+nDlk7Wvdv&sv#yaE-ZMx~ zw0|On@M^(FGhOd`wv+UF`gAny0>p*)Xg}w7#3TvK<&hIXz*P29x zGU?FGLO%jsekWFVOCswGu2DPdbGca(lnnjEe0WXF-&_0%$wG74rFnk7YXoTzB&1-9 z*Q9(k^>l}Wg1gyk6q)N&4+ifZlsDDikbJg|JRF3re0v`GuNiIG_mKME>6tbgXz;Fg z_!tr1!V#~Plf{4o%Tal}so2e_W20D0W(h-bR9cgui!ld}lQqI_3WRX!)}>6-uy`XM zW^Xlhr?f-J(1ub4IFQ93LL%v~qa=?-kR-N*-bLZxyg0j0dm%K}0fLHs8=XO~%389u z--qbTL{xkAnfa%+n#XoMaMVy7)Zu&1s+p_5B({f0kMK|65LS#j3Ynf_372oI^~S=v zEc5uKJ25GT2vfV& ziIQnY&Oy8vocx_Cl(vJO@NNqX)v!gBr{aV{5OB9ttz6O$1+cNksDW_Wb7Exghj$@A zKRViM&8EU`*tgM8>*p>kG$=h>eM)-6v;u70^nMcF__5qfjreE{a~$9E>xR0_C^JChQR&HTxIwsoO zhq4)_THcu{cGsZH3~NA;Y;Nyh-NfVqrGoUkDd8^Yx~~0iU)K^01P7qyNRrVppFQE4 z!gFpm>JG6%MT!*ckIt_6RRx^C?vmc6Mp-gs&LUxJjXm(YfofXQD$ydk@1AwPS= z)}p4ySErOe{=qH%)!qH&kMF{sqYP;jYod;7f%Fa^L4HpZ*x${o88HAN)rbpY;-kln z3LI=fdY&Q!u`Ji(Y!NhqYD;QnU@w`!R{TSh#UjcNK>~0=Sf`E6`~jOTgLG?q>&ig> z<(ea;<40f_D#-Z+Cp>qX#kEqr>eMglv7nL;ZMo87)dsh(Q#~EK@~n(Q+GjaPyibzL zR=@WQPL<0i)F~jI^I#VA^4UJVU;ygQaQgd&*$Gm>_IwEcT{cqa!W3bmLoWJruiP)A zFv|Bi+I!Zo$gCX!;kR1j>CF5l`(~PjbsU9oKsr1v_I>R{l7DP%$e3)}xe^zO9OMt# zPr*!IfLh?;^VJ!FA5}dOPpGNWkf{#8{BpJW(ju?`PVnQ3B%I|J#_Mz_a-k%nHyP*J zgQ#)`-@AYe-vJd0AO(MXKzb|3fjGxRv8LMQ{XuSm{j0Og1nVWC>oR3V25I}_69gjc6N?Yv>E{fXT_5Jzk-|SDbemV8g zYq7`6y?n*OfAd8j1$psTU2r2AUJHd|O~+e<@J(J*g&p}&r_<(S2rmHV!bH8VEp&fb zgRMf)bpe{XKG+;x0GPQDaT_2%Z-LSUjYA@C#<2Q%uIb-)-u-=Q@nm!R?Y#Oksx$d; zV{q}Q)u@MKsRnInp5+us5CiXaL5_waYFR=8=~h&wvrh8k54o^4owXk>9F|Ao?EH;9 zN_haw2VRm?lu`-_4Kk}^lZc|Svk7JI5hpU!P|2vwV;od=NcKt# zIgXK;T?siv_Q?3X-lw|nYhTxWUHA9*_&t7)`>(6V^-#|FeBR^rdcK}RudjVE@v<@m zdb!4Fc)E6DBTC@b>|)Tjeeb+I7%t0pS6<|3vcy1Q>cDsbIKMoHFg=GGPXTn8+my_7 z-Mk(^^L7@ucfO+t1x0k^;8Wc~CD;2_|KI`uqIfd7ltJg*>e(Aoi_ejHQ6A_}DpV^T zgB!uoq#cHC&+8L|VΠGMH-$vgZ!M5_DfKm|a?p?K%51gxbJq^0O{-%vmlb@u%6n z&$IL3${9rQM0H0d6dmSWin*V2a z(FMHzks3j=^S#evA1gYLU2Gm@`=M;J#fyF56Bi}oOOQq>Fjy*4NbCt)2|fX?JOE9V z-kk)&(3d1xY!p#3h;%>b+A};z97#nSwg;uf%PMo@xtYX15fd`6q9Ww@%#~8U3 zDkkRof<2r`s6T*8fmOu5OH`^{F&NWiX0Fb-Mkq@$7a~uz<2&hcz9PW|@G|d`*~s@1T{Ny0+qK;;VkWAe?G=bu^dJZcc4u%0*qvXCx#xun22k};Q^9#i9q&KEpXIN9d@N%$nrpf^dEO5C3_2x^go4$7f# zz5w{oZ8O80nCE7B+YPsC`|Zf#qI)zs9hhaXzp-&*c4fREAIQ|N^ptG2WRwvG{^)j(_V^;ur^9AYIVl~9^`sa|62h>sm37J=t0@;3Sd zr>*u)K5+@F)`^u9sLfp#%zbJ))g1HTU`ER+O{y-K<++orPh<%ms5G2QE zv#~*uN5ngkqO7$R=M*(BO(>&qY;{%KuE~ARBrz(*GXs?)Rx0#+u1Na>we1W_UcJK# z)!^dWR*bU^FdGFu2scyt?~=?@?e8<8`Pvvh5}UB{KOeG{r4;6?-e_6D;P<&YBhy^J%mdG4tSqr{CkknbS+gOqJ3? z5{sq$ZxEB2FlCwf9oG2wP(cjT;z<`%`IA9~Rq_?(bk(LYVeS#waE!9kW82bmRoj#p zFGOz^3RhjH$hgUjcM*w{0&iesd!Qs){z4q#a*hx1W5O^KF5?bInD>i&gg5HxSJiw; zmm*zBWpB!OTm34n5F#r&av5F$4sJH0F+w`a&4I$?JRs2`Rxo6}#!8pxHv2xjRaYa^ znIp9~T(kp>dov9rL?GnCK|@o~I1+|MNqLnR{_88}b7=@4*qxKa?^%1LtEW8W1eheJ zO>x-JMm^5LT*Yb+J!qnOh9p9)Ee)5-c&>g*OV1?1`0|rd5%t(2N1LNN zqFgs?Xv^SgO;tnBXUxA2aKkVVa#NikCRzfU7dj8>sTQGJM`mkmPBXT-{fxm@eg6|2 zk~q?~2Onzuidm6A!YG`%U;K{YMw$)I$jQ<&>g3sFXJ(BiCdyTw0IFrD11(O}i~#q< z?4lG@VkbGB5~uq9H;AbhULb;V$E;9J{dTF+g$K zPT9?8@Q2sRx5lc=HI6vrtXk#|4+}>D#>A~T+hTi?9-AL3ruhIM{Fqj8qafth(HfhJK~s(61nmJOFKyGg(4qn`zZiuX#EgM>&X}*b$0p#ZW(~_cK&rL( zwB>r*#_^M8a!B16{h^7eS-dG5@cq8ey~{HN_l8AY-Dr+vKhS|+1C&^=Z(g4oU^v^ywG;(Z%`2V%8Eab}HRxcrmrL zkm-upwNFjV-@hKcjcd>OvH*YcF5CkqGsOJaRw2`wP@G_=acL2ZfK+e-EvGb+B(bK@ z8_VPjq8>s*itsSS?)mm$zv7bw#}vMI(VOksGVI_#*!3;DKHQ;YR0cn)RMn*R@O&qS zquMY90#(u5uyL)T)HVGGO%F^X-PFs>N&ax>mYRM`x=os~(fRA!7sID8)55IqxnbDe z5Y45KkuWf5<8=TeDYmvybmNx*>;wReMYKgoOSZBeHe)-~Xf{vRSPBbwIm-H!w#u}s zX?8obyw^0>#7QSAz^;fj%PJ;uJ8Y;^d`eKSG{Q-?q-q-2W1sG1*o1T5u76&;S#Mc6 zbzqk@(b`gIAR8D4pX9zX4CH`e(0jz>`3PW>1HdFoLJWf|m!)F>XsArJNz)zg6VQa1 z>9v3&D?!XOF=W;3>mx%Q(abr?l1ewJDYzhJT7euh?G28)lP3B##0=n2H^BVwxtW}L z6#`-EQ=5_kBGAF&(oG$9o5ltNmFZ*dsdZPGFf$o2-}EkV!3nqM<^Xn||D`p5^g2he zZRaG?t-B`hZof@*1o-)JZ@wfmh6Wj;xiayY3)eP5NZ30lxg`yfn-t68y+O_Nc~Wa# z&AQ!g&%6M6M2v$;xkjkKFNhmIM|o!g$FUoaGYr#K?bBsHI|9H>^&M?=o5a&$op z)62X?Hd3<2Dnri{_WH%^ov9X`jS*n31Jr@1>B+5k@(;BYLKRYaJ~z=m>AJZ}tdTWs zwP(AhX|_K$Kd9m~heAf&fO$RW94jFWUQd}t9rCkQlALpATsy2dkvr?059j13G*}3X z0->wlSVx+gSLsRS$&19Ox-(l29cy-a-{Uya%l(UM| z(5{$71EB9HbDUU$VMucfi7GXt#Ec^>L3N-s8ShTu;;_pxc>Zo$xF@=pn}68v;oMcAgcAQ_PD}kb8$JwU12qcGt@1`3R-Vb2fB=Y`jLC=oNU<$-0*nV; zZtS{nvPqZ}r7c%$`$?~Zpl={4bh=^>{X?3Y7?PQ+esOwg7o@j;TAw2k|! z=_Fs-bP+KRvy&hw0qug~4)1)S!hzPj=AtJt@KEJiG|wDXcrfrFw3Wnxl4Tb%W0;T$ zrATR-Nupms7D*8Y7cf3FTPNuh?yOlcimo zhyzUa?K>Eb-c}esW?n}enTKZFL*0TQE~x1+LI1%W12yRr%ZcLMB$d3YOgVbSfv!iO z$c|F`CRzH?#{E}|Z2htk?Y0`gYkF#7oZxM90e47&ma+Zm%Cbi}EjCZK?cSCPKx&*a z*c{Egqox95)wr*Btvbt&Jg-YavIFW^qK0dQs_ZUM3`-)0S6mHhgAm_~RlnroXM&*| zM`yr|0m~kxyb($*+b>u<;qK+Y55lMQR508$>p%v54pOG@no(B<$3)eyx~V zmxa3Y8)5pg*PXfevG-cSM?RoKaiNGnTfxzkmYL|BYiPrQt0ZX3bq&Z4=rSspog4*C z!W)ycqKo-{RGW974I2t!%3&e|UgCu(Sp5A7+GzqXOUKPX#3!7@YzFKmeARVL-Q&Af z`T7AGnZs_j_Y(v4m%fn1)xK`)zp#lI+EqWoQac9Qveap1UTJUg-}-8`Z!TgSWJtBk zvI#<`(r|n@Iyho!dYlJ413lUh3T*B@maP|J>8*CyxrY*yGYBNlsM$m!5o|oqrozW+ z3b+EpOx$Ft2yy#X-=-0a+-I_f!1F804>)$CuiTHs%rP z)T)%QR=jO_xlh_f)6}R_?5(ZR7v!oFIz;tJoAm3ZP6g`n4OL3)hVrfUUy3G2>vJKhFQRgbYg8{Ac=gtCGaNbbBMCxHY%vb4Qyv~?ZmLLBK ztuuakBTBpd?>-BmI>`9gus#$V3af-g$iFk_7r#?3)Vv-5p)PuH=mB2dHRgaRxIKlZ z$gtdNN>J3C>ne0o?i~Y!l}OfcSyP_{SP}W`JQ}o)cjBdFy7mki`XYOLsL!&fz6-f# zLCEzHgk0W$@wnxOi&zeTtmZTnNhZDkr?7)Jkl#bym9y0A~wJxX8NAUoVF4q`hgmmOpwaOuit7M(dDF9{<+KC?;)$cG1Mrx5xL9r ziGM{iWGY%Fjj0@VSwB(iMjm08pgKNbg!`gM+wDuX0FXN&;NeX@*;E49hYU-$0N*%) zO$Yx%JEDie*tYL}+|sCPxy`tgun(|byTCEpaH_PY%%cPWb)PKys)AjC^lAF09$8`| zxb=PtsQf~TT(dbiKJ=z0(d4)muKW{H!;Oiz0feph_nrYNEu#08 zezE{r)dc{r&EM3MeOd_I>D!iUVPt|;IWm6yP5o}qFULecRChlP663W-f!@ix&x3@-wl_>|N{W!T|0fGFE)H0{zjxBu6ntX!shmd-^s_f_1|F;{L_lws97Aj> zz1swN5|zE(eg|6k>@S<&B_Pk(#I$_cO!u7!5w z{X3QXRtGS4gx9N0<)CzxG5~H)NQE$70K>K-_(OY4i^IDV9mfa;sTo-o?5_TsCK>H)FZ!|wglN5_v5VE zC~$hW0}-tr%riaVb7s!~EPVieHW{wsb8qT{1Rc=tA`S z*|?)#@NO?nm?mf1SI9xdp)WX0nltv%Hr2-#6iP>Xv0Y6r_As7$6uPsD?DGb>mXVU= z(AOV>h`%t`YTA(A?6NcqB>=z3>1=m3N3`vkbxg{@9G9VgSHc)| zVDi{ADqpaw756G%w`rH4nt1}CY*meli4qy@@WRy-2>=VPZo-plk zI`&KuAeyB9^}A!fD^#z9n90C_$pk~mmWB^H@W)+nJ6^b^H#<3kd(zVnfK?v$U|kCw zZ3n0YMLe{uCmvP?$SmJG1r=<<;k*c17Ty!bmUbwkq|T+Uv$NMAW05{+n*h8GEPPg7 z5gCrlmrqfcR0Uxdyj7xwYrmhuG%VuHw2YW*(`RV~gRV!&o))+2gMLvIl(r@DRi}SZ78e@4FJ!%aFHUabji37=%kHTI0Sc1VY%=}-5VWL8Z zJp4?m6_Kq7%)NQHP`^3_BfXg7q^RvuN7Xg)rRr?Yu6At$bn(b~$6$cN%8CwqbTnPN z$#w_w{R*~ea#~P-l)vmQER~-}B-SkJr8sSZV&tUsSnM<_R@-K0a%F%I%ln@{7f5+{ zuY&=Zr>Y<1w<%CkIBk74!kCE|U#s;xhSMGlnLzm%=Z+PAR-32T|7hg$p z?9I$BggD54YgkoRuB(GmR5Y9jUdEw7IIr01`;X9u(vhs|OoQm%QmdNy#anu80_x9H z!Ve|%i&Wosfn5sbS5{FH5ZUzBKO&o&gEUSi&>2=W9oVkPs^&_rgR6SsJ1pMp^S&8g zQqX?1ip>xte*7On;sOW1+9Xtp!NQ}44CG$`ZyQME|2LJJG#e!--|4p5)g$A)qyN|Y zfbI&oAyWJ24?Y9RDeE^n_?3l&F-(xzlx>QtG8&+Qhg1I}D)@kz&$kgS^)cYKP@d>? zM#s?Re;-5t+irgm-22n*wk5HB5(YCf*BGE!b9@*H9ZpL0s@~e-Ef|#XHwQg%H)1$| zf0Y$kKbgk<17vVkV-4Ld1efXxCdE6$Z0z!%X_0cz7Uk!hZ0C__BX!X}WM z@iz`#nb1=|(^ab#0Cwa{eeao(YC*Wo`{X8)!r88s7ld8Y414rJcpK*pSjUw=S;ufc z`n`M!2l~GVCx88q@piygv(;Zd`}asRp@;^v_U}<>Xb`zqXO|2_QvW9v!uOvAp4OVb z{xyG6{X9E_7!OFPBfkuuW@K-Klfm#trvy3s1gVdJJ_#BnnAnR2UaQ7fQvmLl1|-d$ z!(!H8j-W|_z+Pk^-BryQ7sE|4;U{eX+KtxhS?`s@$=(C4j*VLmQQimZ0fw-ceQO|P zS6Z@jV5{N|=@~_^5Q8qu;#+9reH7Z5M-FZD+}xt_gA4GJ0LK@pu>6qY4ZZ=&?-#%B z>L6wXKCD$!5-uP(o=uh37Yfy4=!YKT(|s$m=`86ePyqXi_vYP43Xgq{{gZ^)zf0d^ z|M(mli4gl2ihL3lVExE!jmOiT%2_UYVnIj~`8qwUy>g9+O8IyhII_22v3~awu3Ea_ zzkk)5*^|r319D(~gN6NY4cteW@0l+-1Iq60hkjSH9C&~T+XuQZz+Ghh9sBq@cM%%a z!9`j@ty-SV4|m)h*jloUk9_#y)~hGCsU2{~wpM1`t$P87HQ(PqtkKXIJ*1thNg}Ol zDcR`m0SEeI)O3AC&GG^xW(|}*%y0>H6bd!!B0=Pdzh69HzS*m8+Y4{3_@{5ot=&jU z)*Swi^fUFHHg@r{6s@^=~*QYQH!0Nqi`}iGq$Ag@6mHc}${}vi={flyD;6|BQZ-4+txj4Mc@@gSjeM z_D&vDJt1(9US6}w5paa8m65yhyT|pPAoC(bH$tARj=&6Z+Huy=mu!q%Kh%;Q!F<^G z+KMeW3wuY;fsyD-ga$bSkW)N%kE`!tC7>n4U{pVpbqUhaib3m5jPpT+r@{^L3T^z` zv+5kUg!@ZjkOl&R;@60QwtNQEC07uszZ*P)-!m9_Iz`|l+kl?P_(8p@5io})vi&YZ z5F-6Ol0966p~EA-@mpYo4W|_&{WV9nhIi=guYXCnLtSphGtaN_~xYY(4;uz3?$f&uGgMCmLY;jqf5{AgCrCwY^K zf(zKz0Y-MvO8q+>`9E)^>}q3ilCW3p+j1;aSi68zJwl6P$|{}kA8twx7EwmJ2xN~i zUU?!A!Udi?jAz!x;oeVxh#ZSVJ1b}9)EFfZtmH5f14qTmATa4Gm?Z>qEMPKI<~tdG zb<+i)`XxxsLh9zlIB&2^1}xFDp#Dr};f|7vMQ#ceNf~WCLLTPbCpY$?*foQ%GoQNw zg!*J2=XG%wuuJ!&;9(H6Vu5s2F{`GTdDg(@gD_v=eYFdNAIyA~XmG2cgbjo)#jcBK zi1#PLl9Uy4y)g+^{NvYhvD@HpK9;>amc+t$lMbjN@{fn%}J>>j|Zb=g()1bGb}O^ z;8+l!e(f>&XzJCwA)xJxjw=g0u2@O+){?VJHx2{)brnU%38+2tP8M`yzwFCp`TBzA zJ8F%}kDHAnjGH#2Uii1_PBs<_KD$1{ljlgyson}RK$i(gz+HzJfcBIfkrcw3@E zM3M=z8FmHuxCD*$9oA+gjK-tDPZuDGdM{KXEpCrs8VpW(-N9{OK{*~6y*W18TeBvt zxP-B6cJ?!+knM>~4g_Bck5-6#a95%5MF%w>nA2WbGqQo_(L02FA@8qlP@u_BBxy*oY#gukR$;b!~(J=RGe#pj}{r zmPwuHEJ}AC1ncZ>h=+-Wr>|uQZW`XvlZY*`1neJVA9Dd16Zq&Om!61R)_M}?@jkDn zLZ3SyO~$LZLWV${_sK*Gh_u~tD~p3qV9ktaXsh&>A|DTDba9sVBf@J}vyvcw`*Dun zdIe;-&=8m&11G{Skqfe4zp93Fl-xod=$=EyAC5?{E`m8a@p`X)L;Nv&c!pTu5kt_$ zAQ~@7IFp?1aLdRb-3|zwPy6~odf99`(*^p2v*1$TIY>k&sDsZ_xEAI%#bZL!`Ua7P z-o9*Ft9-lXjZG{`501D`S(Y2P!%ca%G|{*dyu+7f#*qg#*;pP8-D;zl&R7L z>ND%`h=c4hjtt7-9C+%Cf{YQ{TLQ$QT@b$01td_?q$#Y|-+2rv*8sXbeZYz4r=1mn z4)J7%+p<_={?%_u%hJ!m>**$EPR3xI^96H-7XGdI%1!)==pD_q6L8SF!r0)mi=P7S zV9Q-~tU8Qah<73QaNMP{(8zr#k$lW|$QZ~10ZBpxxIN{G9)Mdfu8{4R!3iG6Pc&}h z-{Fw@)*OGyJlPG=u6IrJGwlrTIoVEvQlad*od-Sd#R+>?9At|RyLlf5ke!m(AOo&has@!|-QSs|7` z&x`$6AJW`~`-nX1xjhI@E}KVzMB@rti3ol8c4hAcn8{`#jb%VNgUxkuyMgw)u-gS1 zw*KK46`;SES0T6m=fkwyOX8D9u3NbSTOLJ*XO?pOqYq@ zO0kZ`<&OHS=yw)=%D`^jrIDEL;*>~m5@1fJ_{l9mz;m%ezG^0fx=*X1I(`);_de(u z*i)#E(m>BJ{*#{JxYf)^Z+SV{r|;M3$DQ{|y$*`@9R2Jm-0M?h2f;$%bmpKjNBs!K zb8{MH-FrIA6BMLpNn^8cHt3J1o!vk&PM7j=H=|0v={L}T0m>a+RbQ}$+`O8ktFLEX zA3Dun098@`mAy<5TVV}tr&`T$$jg5Vxc?YUOKT3UKf_TUu*1AwLwnOD5hT4AzH<7N zC2rMbOTKhjTl95503(wjIn#8lZ~h)=&b2;N|_NwnEBX#<6Ond>vF?e*oD!mbAo~CZFTJ@SETg;!?xp@sK~1Mn|0mx z<+(?dA93R^MsOqo`g7~#?c}3>w>rZNdlLov005oR`=L4P5nF>6TymZMp5wzm zlBz?Y<*m#T{>eo4k6Y{Z;ChHR7R&f-eWT6j@X_aj8V_SL?DjT?u zhXTjs0fMSz6f1V2Cu|Lff)>57GT_vw=^#}y@5sX{fH39)&nqz8_m!T4Q(Ep|Hu^0lX%E}%189^-Z`0c=Vt`>95z?k zE3t+nryGixnm9WPlZ!9BF4+PDi-Ij0d*o^;-K1@FW4~md z8hg}qX09E#1-$|@cm>>juhhQ%(SoP1Sx+7yj=Yo3a1O%}iD$loFAL*V$EECtk$6h!^F`i$(^1pWyBF+`mv z(xz*{qW^pgEudaPu&Pt~r>lQ4`$IF)_g@ad@DG7xzgx7xsqq^>V5tuqwKH5w`>%0C ze)$Xb1H5gDs4Bm>d4JgayaMaHHX`{Oqs4m$^#oZ+@X){hD9?Rq*bGssQS~ z;k&cIA~@tOb5+`%a?U;}S&T9H7@3B&;0;YD!3xZK)#xKZKNk%)%RlgGp;N+imNrOHo$+j{tsc)cY+w&n z>z@xCkP?fYNx)oD^bDGOHY;5oGw-Ixt)k#qk1Y3KvDtM9{#a}f90Yv>U8}QDgD3+9 zt;{*lK2G$GJ-@n6sRXofZox#cg*se$zQ)x)ciN}t5=5=&d(pXqc+0N&4GjR zjbsF;-AU-yWK?NAcfvvX;U6HyeZu_3RJp zvAw5rk{NEw!KwP|@gkY@ZBiNFBr)>%YV{oT28tFsH!#?U1*eDf1ehU^pU&OL9pe9d8Ji%=A_o4{tcc{bJTK=&+4^BvPTF8KP&* zMIhqIPix^nM%(`QKKISv56ncnd1ve|@76!Pe9;f6-dkm^!a)67)ySXz{X>eYa+1$G zQU5$w^|JI{y28=o;VwYC1PoFTZ~yB{*5!AIf{&~nv_mW3-(VfpGQeu0{5=hqBkRIA z5RIT1MeVU*o7GS%VNsv^NOn~hg#Herhbsayfsfv07j_ha2BRB>Ql#aDw(@X^(E8uO zzJ63+moGs{RjY9P@-J#4TY6D2UnotmhiTQg?=nC>qmZ|SMbaJ-Mu-jzu?O`IU-WUH zHKGC6X8<8{pH%gLjf6dzmpUQ|$}rF`+xuj9J%;eXxw$&gz5u$W{-afhAn|JIUxTpW z>+g^$Q!!G5EzOp%U?D6{e{?>q2)PAc?p<~x!LoOF-Y2cW)U8G5+vM#IHm+id?dk0!~QC*uw`*PF`fwp8;IQ`>L8<> zlDb%lwj(MRKOoL;{+b~`&4)ln%i{{d?RrPa{+`xu-io}fNfkjGp9`quSnOXA+>zKY z!qqo?|298qc|6$1jdr6vpL1K;J0?ep(Yv$<^jO#x99rhDG5FE_-~t@cPhYbq89-bt zeS?o+E{x-fAm*)Lv?!9i1EwEk2;GrX*D6b8Il2=)3%%RWj@aC_FByJ!%2HzHx^~jP ziDUnaO5j+R2iCAKbL+mmdQBHX1tpFm3DGE;SpZ&nV=(`X9P_+gdCOsSlM=8$My4bv za1a5Oo+6dVZ#g>9l+f<=xemQOk^s_rOH3j>5IK9fx;D=)K~kb4*-ivzYJG!DhmDWFsS9SdFH8{0sDrJx z>>MiqqG2>06qhWFf{BYh(k#yV=e!kN`1GJGL8DSNb_LMIg_Uz3(2xenkfly!+bCD$ z04sa0Uf`;1_AXOP7*z;#gFyFVWiad5!u0SU|I<0+0+4hR1NU^nKAm)Yp`>@It_*ap z_yR>769AY=6RbrgmLImp39QmQ{3C&}ve zZgqJpA{a^CXiYzwuHo_YjP!Vpee;0U`{n^139hW3Wx{S>&ea{vy&Z~3EX`IMsDTK?CO8L- z!3HlUrC5kCm&zYR8W06|-C?;|2b?*|ob14d$`R#?l;agIgGFf|^6Ky`jY@X>v$U0Bh)@diyKy`*f7uVb`+-0l=H70wH5T|1>XfU&D_*V=cN8C87cs70WVKDeLpe&Y8h8&a zKKx4SXqi5OW5bPmQLyb@?GvsuT!2G21WgfQi7$bg|5U`AI}QYuZnUP4TkgzjMG@#8 z<-xQW4&xK9Et=XzKsO*}-GXzxyGkc@X4+fISS-%Nl=rk&_&W1?f~eCrxWNyuHheW$ z6Vc9ohY7(c{(l!v`R0-UO95=TI8W1JGehsH$x@xdZE2~#0JDrq7Gba>@mj$J=^`)X z^Xe?T;5gdSt?nm9Yxfzk<9ivO%;)!!-lb-$kZ+wZknLl)YF4Q*E`!2Zcj{q?@hC1-nLk$Ne<$Ucpx( zs~Xu7*p*%dnv6Ellc8^NG7a<4nM^Gq!O&M1@~)M$(Dl`8|1`#2j}H%8#%hP|Sh+;X}WPk1mdJlbMyH0HX`Z?Z_fGTyjf?A*+wnC(a z93xU81I8r&H!-H~jB5Aw@a>VOghN&anC*&7rR{wX;DupL3wbE+owhOAuh@amoT3~< zI@jJcl4^e!DH=OVYYwD-E{~3;Zy8cE5Na{QFk|LYH$D`X@rEtkGm@H7DzG;>pQ}yl z9TRC&TEbU_6r81Yy7<^;r;emH}Hp-dGg201dYmehq8^U8jAp1z6D*_%9%e|JnlCc3mJ8 zx2u8WounYC(W1(!h<$JbV%57);xQu;k2oStakeQniN%@>0v>7fiTTIc82W2(5U+zc zC`m+?+R=KpTM@>)NW<8f5aEAf(wf-?92q+5mcfaO&uXUhbAD$RBE_h1 zCF8_%z2Ou!i~}`KL$~+W&&x-a#-KEmyxOi_T%FU(I-2jE8mfGz_n$ zl>D@D3@v77lH}~v4(1r2A5_6{Re>m_uR7le{H8GuY3a!y+8Ab;kZ)lKmsYnTNTyRc zat7fr8*bKAXVF3~@4cFu9O$B~zlEbgM!n8W`6G?@HF!xk(}Et5G@`K1n*5}PdTBWp zFk6l>C+H`&po(=cX*3BL?Tg$8pUs^uLz7T$PQ=yFrHwucimb`9A25 z1>KxI^aZZya!2N@U%_16i?jYw&L9=D?K`9a3`~K3CbxIS;BX3a(%aNN3-)?YN0*!O zVsb{|4KC{efvKPbXc~*9OhDf-d&`@M%YWQ9^I7kp#Ax9Il#0EmHT8kL`(YS~4krmt z9%}V!dOHL>yg~KV^Oh$ky%-n7B$USed3Y)4mE+Ob*E=;H9a%@jhvj@Y2{TbCiqf0) z!F&R;$aFU+Ut{oZj(EPAlN%gd&A7FtNS&(gQH@Eqm>H2+Y)Ntk^Wy0_BnaIMCPr!c z1MmfVKM0R@Tq%WG#hYj12S=-i?oz|Cbwa3ikGM$M$3_1zf4Oz${dN8L;g$60#8I>M z1_lzwkD*4^kAUtXlt+Wk?57IJH1|MS=^$>zamud7&=xuDSD3w61j*tOJ6QT4U47Z% z>BlcOIYTWq?=pVYXK8wQ(pc;WYDAH562Bkx0$)YV?(@OT&!S<*9}5M3oY7+9t$e&g zoqC=}st%k;B2p2G5$&q$TEMTGLCC{;85K*CPwcv|hFrBtO3?$fF;L{0H3@}7{h1Zf z>Q9VEcCzyac32%;<6sV^DC#z{M4`|@+0Bw4Y2(0L$4#@N?JLWz_nxD752xr{+7v0t zV^MJzaB6Q#w=Hq3`sRMf4hjZgb$gpsk& zV#c=mBaXddgwfvVFg;}yJ;=b6ekB8=^=)PUf?C_p137%J^as< zkF*~+y@iwV0|(r@h12^bsB?uu@7T&=xZqwoQjoarjWioXD@Bf{2mV@XvK!1w@eR5b zu`S2bVyXos=->rZzzeWcj(_V)e;PJIAH5r#ps`PT!9P~Dk5iVan!XT3<=tSrB+}|U z!Lxc58MlFFH~Tub_1ctHtkIk;A0GwahSWN2ht54)g;20U)U?Qmz5>`vP!zoy3LuAQ zI%F1p@c$EX2uwpaj%$DdvhMl!hx9i_c+>PZTcKX87LWzN&7*j?qLe-T^Q4w%;GMV+YcE6v@p=m(eaDtC}$sV%NoEbY$21zw$YM0i#MUQ{ETxFsJ4b@-m(w`n+*k``13w zuRaQBrl4+}p%m`*oMb#1Dggi7BhPWieUxeYRY@hfpU>_B(aBr(L+4*mB7d18;LX-6 zs~)HDWm=C?5|n*(>V+si3R3UnaBMe=DALq6X!*63@WC0|jJuP2E&_xM?2dAwaKqyp zR}Z7pgDX^0y~p@AAL8i!2cyi}55dpDc!s8+5L8~Z>oq32uQ35q;zn5+#4szbxiq2B*9PrANv3rJbV6`zvO@#EE<*ymNo`9mnyTcKaZqfSM;v_(MiTDhm3*N5) zwiKiS>1<`gtpn^02gn~W3?VP=Sw>QSGCMb|#WTJbNKyg)EW-|gW568Qz|%_kf2$r0 z{0P64K&B~leE=s3K~B)MM*TAvtv?mo`L^dUD)8~=Z=^1G2G9RG^6uFDU-9nvp91uM zg6rS6cl!$q^M8WtzmoUy!z$H{+i;d)ghEg z%CV%WrO`JJZhRkXMglYaD{!H8>szGzbf^WXebFca8GRt97rQ$**V9wvLjjeWDBttu z_wcFbc@t!*;21&n|gZ;IC3O+Uw&rA1r?s&FykuW2D`GF2D$iGw( z{k+nEDV-SbgR(||HGS;`-Q~;d;8a9=>(~K0YDMW@htU$>1dKuSmUEH?jPCq2zGq>C zSGWCO2&c^DF||E1D@IYDV#9l&TD(m;=kfYJ_bW=@N{(l}Jo!_yo4<*r*q`-Ej`KkA z8dT}oJ7Rf$|JK&yJNJKgkZxiG-qOy~sOG0+he!izIS${;V&A)qR zgsuI+82p#8)eKdwGf)dIuSe3G%hK0DHOWKS^6Uoio~F79{+ULq{KpW9JR|jcAoN&+q<98pZ{eIGq_?hcI4SZ?kBSba@%Mg zoqJKrafnm#(*~Qmorki-Zm)NyT*LN+z;vi&;8A1~(;;7fv64)uwe*o|n^rUNvqI)N2@0GrY z?AKw^+lg`w?594xu}5y)Nc#bt%MyJUA6oMxW3*8a#ke9GK{O~NZGzN?t|QnFQ7b!5 zf+fi_Y{>c*as^U^ru(IreQ3hu3T{uzPTgmnYx@CmVQzz&mIw?Kiw>4AHX`V2dbq7V z$9jmnwji%4PtY}Uv%)(NJ~<+R5BM%(-#%OYKyrDm4~Z|G;O5DhRNCJ3g9{K1;AM2c zB8-WvP{7qFL{+#T|Egy+)N9FBI4cZS_J9MBZ@{WqPutIHpQ1*-9X0Y(KN|TdL!jD+ zu3a+vW&(6-AMB?as{*&{|J#p8tKtUu@g$FKqjua|;H|j~U}YDISpwTwMig*W3>IA7 zLt`KX5?(1GMuGA~^~72-2LuiM={Xb~ku-WCC%}-^doBS<0=WI(+*`+2^zoiOsQ)H7 zasfzdmi+Q;%_x|_ySA{S=mUqX-H^hxn1^CzFd1c_8tZw>B`^Ti(*wv?19{CB!u&m# zG4`wF9+xEOP2kJ~&_Bcv`fH;A(_IGis zZIKEXAUvg`h6ATEtde7eMa5aQBcRcu<+{y4l?@1A~Qo0hul{==P5ls{)92t4|> z{=|Z6Y?`d?=~+jsrHCMxXT`ei2+IAPQc z<#}5z@l+JI=-%Oj`4Z;QVQ7jN_YFVfBu!+WPll-w-C%B)2?kr_fO1e*#+ShGWXaj;UP_W9;o&^Q zj4!0dVAKu0=lgq9i(YM2D1_zk;QWgLWRTt04FmR3AS5}#g2^Tibs#3=L!XfOj3;sh z1WUxv7rU|AaX$Hi8B=J!*(6n?E!>{dHG;#nB7ec;dZVaCF`zlmMwBmBsPK<8fh4$! zAc}1C+hbiCPvPzI$y)Un>pADR(s(j*?^zTe-5_g3l;p~EmXFeNJK#Y%quCA0++ECW z7vH4+XdTVYcUfVnV?cHJQZDGFQ7NvZrlA~|fe0kDU>=6(3gnPr$mp6{r~@d%kUqP` zMa4lOF5`%OR@0136cZ zf}U|A#5BkqzL|O|4wDTHN-bF=3yS&~`gLX;Um^ad6!!(BwbOp0e2n#`U7?IB18TT7 z$b?w0q`ujVDP+B7k`SYmS*!X;5{C!jKBilpOa1v&b%^ziPip!5 z>*KVhML#<)Ws2uIRk;%uX#IA+TcoYtBEP5Ww~i`NRQ8{!N!0}hk=|-vlPw3_nMO8F!`+9{>Nt}^g^iz zwQ{HWDL0b!_HCfIhri3d-g3j6#0W!D^6+Ce)iFvzzSHGz>!&mK&C8y%roim3CO$BF zw7|~h98P11%qm3Egz_Jz2Dpj$NC4M> zpK72a>9tx2rn`Xi5e*SHgb*hw2)cv`dlj-Y|KV(e9nlDn0 z6&I_hKgt>2dSFc!hhEw;xG#z+IfhlMP@T5YcMm90%#4ywGirZzno$Gtsdr;I2renN zg4(w!R^0Vf&AWBX&>NSaX8sHYPG$FZs0s0`@W}mhz0^uqwB+>I zTTrVnO%CtGDususo%x#J88*mk2foiyHZ!)T<&~mZY-H8dg`R6t>19j|&cn47-JjZa zGl^i$lx*F4PfXq8*w@F9K-@FOJ$W&INV5r<5~-9OQgo}lK_if{gSj*qc0&q$!};)y+zY;emuQcO8q-#f6L?asE|127alw?D{ihAx}~NrKJCL(qZ6(qV@nv}qJJN^JjE$>N@Jz1B&aexL7A$o zQVXW0x0I?{ZK`*r4*6l?2#)QM?dk)fxQ|cvo1}2K6GT%)G4F4k&)2vSgo{|MOm&KJ zG*sOr1QIJ2Vb;>=n89I|!k~0H^u4B>BBO3j;`$&=M_AOtlQ=H~?>Bqp?>(T}eJH{KNe)97CRpdE|M&uEmGZ0rxTeTE%Ip z=fMKz3;j8vFpPehdWbf5Y7snmJ^<@up92(yw83wlwi(_%oSWTt!m18N_tA}N?v^jo z4A&)Q-Mt`;dtSs<2C)FU)eZUwzS`c;m&=gWKHC zhbw@#I2><&bxNtL$@KZeev|N|RixJ!S?c-eaYP)@7`0!@3edM9@mSz(c&RGlL;DPJ#dA^68*7Ru_c zLy=Lt+nZm>N#`id%CsuFA14W!XMW`(#qI{RL1eyxdk*D3bVq7_qRQ-ezd3W@7N}c##+$lR^?Y zK>MURS9cV5cNR!77sM|f)yE7I-X4K5l+;=cAs=9F_k7Gq?8PdU%qZkfIrt@E2_s?< z66h|QLsN+%`sanuoQdE!So9fwcm?XkHmMti1-!qp>AquP3?M$tP1zv!8}Ip@X@0ZQ z+l3|Ii_Q$>+bZFYw6i+%ndn^XDk9>=C-gZQh>y9}3_v+Xdb&X3k;3Lcat@6%bf8n) z+FA5R{IA~g}XTV%V za>=PpFG?nDZ;2T(WuUyzOn+qQ>j^!jT2(l#Q#p}(Kk6ZO2tG*X zph2Tv(hC$$1wBjq$|vXq$K{lju5Rl)18+;+AgVQ0FZYSb8%Pc>`=IGQ z6?^ZND9Q8;b_u`IsYo}*TV_XF_bGi|3&)JEVBh|}bmF}z_x8y1`9~~RLiVYTK*rps zzzRLg3v{t>kCqg$@2@JfnMh0Wwni-4+KFaCWk!MR3R%4)*!#t%BP~fqY@XsV8*+q}{Sk`O+|*kl$h;u{G%p-;8U%v%i`r&Qr`{ynfV9kiZ$ znts2P>WI4YFCmMj8X>o`^b72J3NJzXy>KfK^Ag4Mh1_eOgqva3?-b)FQ?2IQS6(yi zRR$=Mkd&l&4&w!pW@M78J@k3Yg;>cOc!jFFH8iQGe87c#Cgh-*piSLK(Tfc{ZUQ6} ziV5!jBZ8+i#4NcJCqi)9CoeGeovsZb7$oH)_QxQ5(|zJ?r3}4VygxaGg$svS zPoU%s!dP<;q7IREmuN%uhp9zOImtqtqf{#=IUrixeG%T^c++Vpvo&2It<_yN{lXj5 zQ!M3tc4pLJscHea1>^rTJ9aw z8IWS65jw9?baFWMVn_ALS#uRhBSZAQx0ggQ<;sV~&D@_!C*ExQ0Q}^cuk!RpVacRc ze5|0=y(}YoZlUZ8D9DC4*8(NI8R=iDi=yPdh`D+C1_DWNz;Xqf;^QdZ&p&&h~?}^?^RfsGj|1B zcHbS>mU{z+Q7;CQ+rnmoIm)bAYj!TYqF0$)oF2Ox$yV&c5{KyiRR{*t>u}!H1eVJOasD!Yvw)fFmlfBC15` z1<9$*7Z?RC??@5!70h3t>y&x-^lX$y>QI~>=D4*{iZCB;!}b-hSNq~2BNZ`#DY3R5 zLfKF&Pm2}-rx~K^=JqpB3HN>klB&s342+_pSz z6CchxkEuqN-WtX%J}b^jPT&K>aq@F5M60(8>fkQqt&4WYAHjD_V)O~G)AB%D@wM;0 zHkh$X#j00+LKM5z)6m_Ha{A_jI)qF<#X{&n=)5L}Zp%Hsp@V^tXeD-*>Kvo5!fd`` zdi$_E{k6P!K02Kxs*Y(6g<`GAH;d1P9r zg_{NbE&X~m`P(&uT(aqknUDc+8L_dK`Vw=rgyy;OUF@v6II}bIsg$s_Gx!;m{BnVu zWW|@5OV(1=MBS$=Ja0}H4P!%{(mqBtR=qx#K2(HD%}pJibR!L7_9+BIS+BHRP6#Dl zjfGWu!*?RAO0#)ok0O9c?8fO{0>BoTU&u-HYK5foSs=U3x2^rw6D`*UF_=>L?xc3C zuL}2Xmo)Q=NM9Mh39zShd>$ZB`3ythQYtf2b~7XGIcewIwx+$%e0z1MY=f+Rf$z}D zi&HzmMim0o@C{(L^YZQ<=DFdY=K*umJil-~Mk`p@2d0hxT0Zy$Y99fs>Wb^Y~&yd#&XHd~SbhstEg z+wnxW*xsi!g_|s&IRKjO?A@d!8eBl)iJ!Rn6^x*ng;0Li=!`Mu7=*Wz$O5VZHH5K7 zw=6Y8fACknQX$(L3Vx>}h5M%Ni(10vJoWXv%~T&tk~7#CJ0E9f{&jA8CdwK+Tg{mQ zd=n%#`|4DKsuBMzM-9yQ_1vJy5sYAazGh;}+xE)BhTcS@gb`$fDE7|KJ0`V1_n2ms zcB_@*BQnPs?6AUkJ0nh5wd(EI?inUlH23`>J(P5Jr#INF963~i3BM?jS=&3GUTk%W za11;Eo6!B%Q}GMIWK_K3WLS!$XdwOk;%sN