unstructured/test_unstructured_ingest/structured-json-to-markdown.sh
qued c7c3e3c082
feat: convert elements to markdown (#4055)
Creates a staging function `elements_to_md` to convert lists of
`Elements` to markdown strings (or a markdown file). Includes unit tests
as well as ingest tests and expected output fixtures.
2025-07-16 14:34:29 +00:00

27 lines
785 B
Bash
Executable File

#!/bin/bash
# Define the input and output top directories
SCRIPT_DIR=$(dirname "$(realpath "$0")")
OUTPUT_DIR=${1:-"$SCRIPT_DIR/structured-output-markdown"}
INPUT_DIR="$SCRIPT_DIR/expected-structured-output"
PYTHON_SCRIPT="$SCRIPT_DIR/../scripts/convert/elements_json_to_format.py"
EXCLUDE_IMG=0
NO_GROUP=1
# Function to process JSON files
process_json_files() {
# Add flags based on the variables
cmd="PYTHONPATH=${PYTHONPATH:-.} python \"$PYTHON_SCRIPT\" \"$INPUT_DIR\" --outdir \"$OUTPUT_DIR\" --format markdown"
if [ "$EXCLUDE_IMG" -eq 1 ]; then
cmd+=" --exclude-img"
fi
if [ "$NO_GROUP" -eq 1 ]; then
cmd+=" --no-group"
fi
# Run the Python script with the constructed command
eval "$cmd"
}
# Start processing from the input directory
process_json_files