unstructured/test_unstructured_ingest/structured-json-to-html.sh
Marek Połom f333d7fe7f
feat: Json elements to HTML converter (#3936)
## NOTE
`test_unstructured_ingest/expected-structured-output-html` contains all
test HTML fixtures. Original JSON files, from which these HTML fixtures
are generated, were taken from
`test_unstructured_ingest/expected-structured-output`
2025-03-04 13:57:35 +00:00

27 lines
758 B
Bash
Executable File

#!/bin/bash
# Define the input and output top directories
SCRIPT_DIR=$(dirname "$(realpath "$0")")
OUTPUT_DIR=${1:-"$SCRIPT_DIR/structured-output-html"}
INPUT_DIR="$SCRIPT_DIR/expected-structured-output"
PYTHON_SCRIPT="$SCRIPT_DIR/../scripts/html/elements_json_to_html.py"
EXCLUDE_IMG=0
NO_GROUP=1
# Function to process JSON files
process_json_files() {
# Add flags based on the variables
cmd="PYTHONPATH=${PYTHONPATH:-.} python \"$PYTHON_SCRIPT\" \"$INPUT_DIR\" --outdir \"$OUTPUT_DIR\""
if [ "$EXCLUDE_IMG" -eq 1 ]; then
cmd+=" --exclude-img"
fi
if [ "$NO_GROUP" -eq 1 ]; then
cmd+=" --no-group"
fi
# Run the Python script with the constructed command
eval "$cmd"
}
# Start processing from the input directory
process_json_files