mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-07-07 09:02:59 +00:00

* Apply import sorting ruff . --select I --fix * Remove unnecessary open mode parameter ruff . --select UP015 --fix * Use f-string formatting rather than .format * Remove extraneous parentheses Also use "" instead of str() * Resolve missing trailing commas ruff . --select COM --fix * Rewrite list() and dict() calls using literals ruff . --select C4 --fix * Add () to pytest.fixture, use tuples for parametrize, etc. ruff . --select PT --fix * Simplify code: merge conditionals, context managers ruff . --select SIM --fix * Import without unnecessary alias ruff . --select PLR0402 --fix * Apply formatting via black * Rewrite ValueError somewhat Slightly unrelated to the rest of the PR * Apply formatting to tests via black * Update expected exception message to match 0d81564 * Satisfy E501 line too long in test * Update changelog & version * Add ruff to make tidy and test deps * Run 'make tidy' * Update changelog & version * Update changelog & version * Add ruff to 'check' target Doing so required me to also fix some non-auto-fixable issues. Two of them I fixed with a noqa: SIM115, but especially the one in __init__ may need some attention. That said, that refactor is out of scope of this PR.
40 lines
1010 B
Python
40 lines
1010 B
Python
import json
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from unstructured import utils
|
|
|
|
|
|
@pytest.fixture()
|
|
def input_data():
|
|
return [
|
|
{"text": "This is a sentence."},
|
|
{"text": "This is another sentence.", "meta": {"score": 0.1}},
|
|
]
|
|
|
|
|
|
@pytest.fixture()
|
|
def output_jsonl_file(tmp_path):
|
|
return os.path.join(tmp_path, "output.jsonl")
|
|
|
|
|
|
@pytest.fixture()
|
|
def input_jsonl_file(tmp_path, input_data):
|
|
file_path = os.path.join(tmp_path, "input.jsonl")
|
|
with open(file_path, "w+") as input_file:
|
|
input_file.writelines([json.dumps(obj) + "\n" for obj in input_data])
|
|
return file_path
|
|
|
|
|
|
def test_save_as_jsonl(input_data, output_jsonl_file):
|
|
utils.save_as_jsonl(input_data, output_jsonl_file)
|
|
with open(output_jsonl_file) as output_file:
|
|
file_data = [json.loads(line) for line in output_file]
|
|
assert file_data == input_data
|
|
|
|
|
|
def test_read_as_jsonl(input_jsonl_file, input_data):
|
|
file_data = utils.read_from_jsonl(input_jsonl_file)
|
|
assert file_data == input_data
|