mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-10-10 15:44:31 +00:00
fix: relative path / permissions issues with v2 fsspec connectors (#3186)
When the v2 fsspec connectors currently generate the relative path, they may introduce a path with a leading slash (this happens in the case of the Box connector, which is a subclass of fsspec). When this happens this results in the paths unintentionally being treated as absolute paths. As a result, the ingest pipeline attempts to write files to directories at root level, which in turn raises permission issues. Note: Box expected results needed to update now that it's no longer failing. Aside: found that our tests were unintentionally skipping `box.sh` tests because we were intending to skip `dropbox.sh` and we use regex to match if a given test is in skip tests. This adds changes to force an exact match. ## Changes * Strip leading slashes during the creating of relative paths in fsspec connectors * Add expected results for Box connector * (bonus): `make tidy` altered an unrelated file by removing an unnecessary call of `pass` * (bonus): check exact match for skipped ingest tests which fixes Box tests getting skipped ## Testing [Tests](https://github.com/Unstructured-IO/unstructured/actions/runs/9461928289/job/26093475612#step:7:2085) for the Box connector was failing. It was accidentally getting skipped (see changes above). It is now no longer skipped and passing.
This commit is contained in:
parent
c2065db716
commit
17bc55e7be
@ -1,4 +1,4 @@
|
||||
## 0.14.6-dev2
|
||||
## 0.14.6-dev3
|
||||
|
||||
### Enhancements
|
||||
|
||||
@ -7,7 +7,8 @@
|
||||
### Fixes
|
||||
* **Fix passing parameters to python-client** - Remove parsing list arguments to strings in passing arguments to python-client in Ingest workflow and `partition_via_api`
|
||||
|
||||
**table metric bug fix** get_element_level_alignment()now will find all the matched indices in predicted table data instead of only returning the first match in the case of multiple matches for the same gt string.
|
||||
* **table metric bug fix** get_element_level_alignment()now will find all the matched indices in predicted table data instead of only returning the first match in the case of multiple matches for the same gt string.
|
||||
* **fsspec connector path/permissions bug** V2 fsspec connectors were failing when defined relative filepaths had leading slash. This strips that slash to guarantee the relative path never has it.
|
||||
|
||||
## 0.14.5
|
||||
|
||||
|
@ -0,0 +1,331 @@
|
||||
[
|
||||
{
|
||||
"type": "Header",
|
||||
"element_id": "3cea98cfe0d578669abe2c435f9f50da",
|
||||
"text": "US Trustee Handbook",
|
||||
"metadata": {
|
||||
"header_footer_type": "primary",
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "5209312022a75a31d95385fdccff68fa",
|
||||
"text": "CHAPTER 1",
|
||||
"metadata": {
|
||||
"emphasized_text_contents": [
|
||||
"CHAPTER 1"
|
||||
],
|
||||
"emphasized_text_tags": [
|
||||
"b"
|
||||
],
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "22a23e29022f32945965002cd734a8f0",
|
||||
"text": "INTRODUCTION",
|
||||
"metadata": {
|
||||
"emphasized_text_contents": [
|
||||
"INTRODUCTION"
|
||||
],
|
||||
"emphasized_text_tags": [
|
||||
"b"
|
||||
],
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "4c175cf543957acc4420221de28d3fca",
|
||||
"text": "CHAPTER 1 \u2013 INTRODUCTION",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "77022a5264f552b223538977cd40f640",
|
||||
"text": "A.\tPURPOSE",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "8e9d0514cc08b3b0898cd4f165d8d188",
|
||||
"text": "The United States Trustee appoints and supervises standing trustees and monitors and supervises cases under chapter 13 of title 11 of the United States Code. 28 U.S.C. \u00a7 586(b). The Handbook, issued as part of our duties under 28 U.S.C. \u00a7 586, establishes or clarifies the position of the United States Trustee Program (Program) on the duties owed by a standing trustee to the debtors, creditors, other parties in interest, and the United States Trustee. The Handbook does not present a full and complete statement of the law; it should not be used as a substitute for legal research and analysis. The standing trustee must be familiar with relevant provisions of the Bankruptcy Code, Federal Rules of Bankruptcy Procedure (Rules), any local bankruptcy rules, and case law. 11 U.S.C. \u00a7 321, 28 U.S.C. \u00a7 586, 28 C.F.R. \u00a7 58.6(a)(3). Standing trustees are encouraged to follow Practice Tips identified in this Handbook but these are not considered mandatory.",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "6647ac00520f9b8dcf37f1625d008a69",
|
||||
"text": "Nothing in this Handbook should be construed to excuse the standing trustee from complying with all duties imposed by the Bankruptcy Code and Rules, local rules, and orders of the court. The standing trustee should notify the United States Trustee whenever the provision of the Handbook conflicts with the local rules or orders of the court. The standing trustee is accountable for all duties set forth in this Handbook, but need not personally perform any duty unless otherwise indicated. All statutory references in this Handbook refer to the Bankruptcy Code, 11 U.S.C. \u00a7 101 et seq., unless otherwise indicated.",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "60220f2162f5d83e2af6fc8d144bd429",
|
||||
"text": "This Handbook does not create additional rights against the standing trustee or United States Trustee in favor of other parties.",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "e341ffc123dd2827638aba18149c4175",
|
||||
"text": "B.\tROLE OF THE UNITED STATES TRUSTEE",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "3a6e7cf9f42299fd056a5a7a1279753a",
|
||||
"text": "The Bankruptcy Reform Act of 1978 removed the bankruptcy judge from the responsibilities for daytoday administration of cases. Debtors, creditors, and third parties with adverse interests to the trustee were concerned that the court, which previously appointed and supervised the trustee, would not impartially adjudicate their rights as adversaries of that trustee. To address these concerns, judicial and administrative functions within the bankruptcy system were bifurcated.",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "4a3de42983fb56345c598326c3732769",
|
||||
"text": "Many administrative functions formerly performed by the court were placed within the Department of Justice through the creation of the Program. Among the administrative functions assigned to the United States Trustee were the appointment and supervision of chapter 13 trustees./ This Handbook is issued under the authority of the Program\u2019s enabling statutes. ",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "1b11ebe52652656e0ed8c12e5969de9b",
|
||||
"text": "C.\tSTATUTORY DUTIES OF A STANDING TRUSTEE\t",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "5820e4e6e72ffc7a9f962983c727f9a9",
|
||||
"text": "The standing trustee has a fiduciary responsibility to the bankruptcy estate. The standing trustee is more than a mere disbursing agent. The standing trustee must be personally involved in the trustee operation. If the standing trustee is or becomes unable to perform the duties and responsibilities of a standing trustee, the standing trustee must immediately advise the United States Trustee. 28 U.S.C. \u00a7 586(b), 28 C.F.R. \u00a7 58.4(b) referencing 28 C.F.R. \u00a7 58.3(b).",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "NarrativeText",
|
||||
"element_id": "3bbf318afaf932ebb9f5e9cf1b74efa2",
|
||||
"text": "Although this Handbook is not intended to be a complete statutory reference, the standing trustee\u2019s primary statutory duties are set forth in 11 U.S.C. \u00a7 1302, which incorporates by reference some of the duties of chapter 7 trustees found in 11 U.S.C. \u00a7 704. These duties include, but are not limited to, the following:",
|
||||
"metadata": {
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Footer",
|
||||
"element_id": "64a3d9e381082c0d1977ae11f4c40cf1",
|
||||
"text": "Copyright",
|
||||
"metadata": {
|
||||
"header_footer_type": "primary",
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/handbook-1p.docx",
|
||||
"version": "83125548004193369404829885052395764226",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874451.0",
|
||||
"date_modified": "1688874451.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,24 @@
|
||||
[
|
||||
{
|
||||
"type": "Table",
|
||||
"element_id": "32bc8af17151389d3e80f65036f8e65b",
|
||||
"text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.",
|
||||
"metadata": {
|
||||
"text_as_html": "<table><tr><td></td><td></td><td>January 2023 ( Someone fed my essays into GPT to make something that could answer<br/>questions based on them, then asked it where good ideas come from. The<br/>answer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,<br/>or missing, or broken? You can see anomalies in everyday life (much<br/>of standup comedy is based on this), but the best place to look for<br/>them is at the frontiers of knowledge. Knowledge grows fractally.<br/>From a distance its edges look smooth, but when you learn enough<br/>to get close to one, you'll notice it's full of gaps. These gaps<br/>will seem obvious; it will seem inexplicable that no one has tried<br/>x or wondered about y. In the best case, exploring such gaps yields<br/>whole new fractal buds.</td></tr></table>",
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "text/html",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/nested-1/ideas-page.html",
|
||||
"version": "77943175838335685751163845636763163681",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874401.0",
|
||||
"date_modified": "1688874401.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,24 @@
|
||||
[
|
||||
{
|
||||
"type": "Table",
|
||||
"element_id": "32bc8af17151389d3e80f65036f8e65b",
|
||||
"text": "January 2023 ( Someone fed my essays into GPT to make something that could answer\nquestions based on them, then asked it where good ideas come from. The\nanswer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,\nor missing, or broken? You can see anomalies in everyday life (much\nof standup comedy is based on this), but the best place to look for\nthem is at the frontiers of knowledge. Knowledge grows fractally.\nFrom a distance its edges look smooth, but when you learn enough\nto get close to one, you'll notice it's full of gaps. These gaps\nwill seem obvious; it will seem inexplicable that no one has tried\nx or wondered about y. In the best case, exploring such gaps yields\nwhole new fractal buds.",
|
||||
"metadata": {
|
||||
"text_as_html": "<table><tr><td></td><td></td><td>January 2023 ( Someone fed my essays into GPT to make something that could answer<br/>questions based on them, then asked it where good ideas come from. The<br/>answer was ok, but not what I would have said. This is what I would have said.) The way to get new ideas is to notice anomalies: what seems strange,<br/>or missing, or broken? You can see anomalies in everyday life (much<br/>of standup comedy is based on this), but the best place to look for<br/>them is at the frontiers of knowledge. Knowledge grows fractally.<br/>From a distance its edges look smooth, but when you learn enough<br/>to get close to one, you'll notice it's full of gaps. These gaps<br/>will seem obvious; it will seem inexplicable that no one has tried<br/>x or wondered about y. In the best case, exploring such gaps yields<br/>whole new fractal buds.</td></tr></table>",
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "text/html",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/nested-1/nested-2/ideas-page.html",
|
||||
"version": "293680985726204769765169474511274942733",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874389.0",
|
||||
"date_modified": "1688874389.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,288 @@
|
||||
[
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "6e8d4e8762e9bec346ce9637a0efec16",
|
||||
"text": "GSFC: Sciences and Exploration Directorate",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "Title",
|
||||
"element_id": "4cfab7f060a22db4c90b2d0f75f72d5d",
|
||||
"text": "Virtual Machine Environment Scorecard",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "0f0ae8b6e925e38d5882b7881568baff",
|
||||
"text": "Code 600: Sciences and Exploration Directorate (SED)",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "2cee95c9665ecf4c0ef061df2c84269f",
|
||||
"text": "Code 610: Earth Sciences Division",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "d462a5a89ab41204621103f923cb8816",
|
||||
"text": "Code 660: Astrophysics Science Division",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "6e72804e49ae9ddb285026b56e8a7c21",
|
||||
"text": "Code 670: Heliophysics Science Division",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "84304dd1bf3a8501d42c7cf06a10fc36",
|
||||
"text": "Code 690: Solar System Exploration Division",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "606d67afd6257571bd3c1c9be25bf7d6",
|
||||
"text": "Support offices",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "add73d98a4221ed85ac38e4b2ba85f28",
|
||||
"text": "Code 603: Administration and Resources Management Office",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "f372bcfbba68a3fcbb1e3bdccd9f6c17",
|
||||
"text": "Code 605: Science Proposal Support Office",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "fd7b9fb95274e0dece5aedb70e2c2296",
|
||||
"text": "Code 606: Computational and Information Sciences and Technology Office ( The SEDVME project is managed out of 606).",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "0463455f2b596a1181ce682c503431d0",
|
||||
"text": "Code 700: Information Technology and Communication Directorate",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "ListItem",
|
||||
"element_id": "ce3c2479c2f2e0f0e2ede14da592480f",
|
||||
"text": "Project management help, CNE, Zoned Architecture, IT Security, Production SEDVME Service Manager",
|
||||
"metadata": {
|
||||
"page_number": 1,
|
||||
"languages": [
|
||||
"eng"
|
||||
],
|
||||
"filetype": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"data_source": {
|
||||
"url": "box:///utic-test-ingest-fixtures/science-exploration-1p.pptx",
|
||||
"version": "309546934335254463247992132065898582121",
|
||||
"record_locator": {
|
||||
"protocol": "box",
|
||||
"remote_file_path": "box://utic-test-ingest-fixtures"
|
||||
},
|
||||
"date_created": "1688874428.0",
|
||||
"date_modified": "1688874428.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -110,11 +110,21 @@ for test in "${all_tests[@]}"; do
|
||||
rc=$?
|
||||
if [[ $rc -eq 8 ]]; then
|
||||
echo "$test (skipped due to missing env var)" | tee -a "$SKIPPED_FILES_LOG"
|
||||
elif [[ "${tests_to_ignore[*]}" =~ $test ]]; then
|
||||
echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG"
|
||||
continue
|
||||
elif [[ $rc -ne 0 ]]; then
|
||||
exit $rc
|
||||
else
|
||||
# Check if the test is in tests_to_ignore
|
||||
ignore_test=false
|
||||
for ignore in "${tests_to_ignore[@]}"; do
|
||||
if [[ "$ignore" == "$test" ]]; then
|
||||
ignore_test=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if $ignore_test; then
|
||||
echo "$test (skipped checking error code: $rc)" | tee -a "$SKIPPED_FILES_LOG"
|
||||
continue
|
||||
elif [[ $rc -ne 0 ]]; then
|
||||
exit $rc
|
||||
fi
|
||||
fi
|
||||
echo "--------- FINISHED SCRIPT $test ---------"
|
||||
done
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.14.6-dev2" # pragma: no cover
|
||||
__version__ = "0.14.6-dev3" # pragma: no cover
|
||||
|
@ -198,7 +198,9 @@ class FsspecIndexer(Indexer):
|
||||
raw_files = self.list_files()
|
||||
files = [f for f in raw_files if self.does_path_match_glob(f)]
|
||||
for file in files:
|
||||
rel_path = file.replace(self.index_config.path_without_protocol, "")
|
||||
# Note: we remove any remaining leading slashes (Box introduces these)
|
||||
# to get a valid relative path
|
||||
rel_path = file.replace(self.index_config.path_without_protocol, "").lstrip("/")
|
||||
yield FileData(
|
||||
identifier=file,
|
||||
connector_type=self.connector_type,
|
||||
|
@ -160,7 +160,6 @@ class BaseMetricsCalculator(ABC):
|
||||
@abstractmethod
|
||||
def _process_document(self, doc: Path) -> list:
|
||||
"""Should return all metadata and metrics for a single document."""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
|
Loading…
x
Reference in New Issue
Block a user