diff --git a/scripts/user/u-tables-inspect.sh b/scripts/user/u-tables-inspect.sh new file mode 100755 index 000000000..953009492 --- /dev/null +++ b/scripts/user/u-tables-inspect.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash + +# TODO: +# * include the page number in output html filename, maybe all tables per page +# in one html page. + +set -e + +USAGE_MESSAGE="Usage: $0 + +Requires an unstructured output .json file as the only argument. + +Each table in the file is saved as an individual html file and +opened in Safari (if running on mac), providing a quick and easy +way to see the structure and content of each Table element. +" + +# Check for the presence of an argument +if [ "$#" -ne 1 ]; then + echo "$USAGE_MESSAGE" + exit 1 +fi + +# The JSON file to be processed is the first argument +JSON_FILE="$1" + +# Extract the basename without the extension +BASE_NAME=$(basename "$JSON_FILE" .json) + +# Directory where the files will be saved +TMP_TABLES_OUTPUTS_DIR="$HOME/tmp/tables-out" + +# Check if the directory exists, if not create it +if [ ! -d "$TMP_TABLES_OUTPUTS_DIR" ]; then + mkdir -p "$TMP_TABLES_OUTPUTS_DIR" +fi + +# Counter for the table files +COUNTER=1 + +# Parsing the JSON and creating HTML files +jq -c '.[] | select(.type == "Table") | .metadata.text_as_html' "$JSON_FILE" | while read -r HTML_CONTENT; do + # Remove leading and trailing quotes from the JSON string + HTML_CONTENT=${HTML_CONTENT#\"} + HTML_CONTENT=${HTML_CONTENT%\"} + # add a border and padding to clearly see cell definition + # shellcheck disable=SC2001 + HTML_CONTENT=$(echo "$HTML_CONTENT" | sed 's//
/') + # add newlines for readability in the html + # shellcheck disable=SC2001 + HTML_CONTENT=$(echo "$HTML_CONTENT" | sed 's/>\s*\n "$HTML_FILENAME" + + if [ "$(uname)" == "Darwin" ]; then + # Open the file in a new browser window + open -a "Safari" "$HTML_FILENAME"& + else + echo "$HTML_FILENAME" + fi +done