mirror of
https://github.com/Unstructured-IO/unstructured.git
synced 2025-06-27 02:30:08 +00:00
build: element type frequency evaluation metrics workflow in ci (#1862)
**Executive Summary** Measured element type frequency accuracy from the current version of code with the expected output. The performance is reported as tsv file under `metrics`. **Technical Details** - The evaluation measures element type frequencies from `structured-output-eval` against `expected-structured-output` - `evaluation.py` has been edited to support function calling using `click.group()` and `command()` - `evaluation-ingest-cp.sh` is now added to all the `test-ingest-xx.sh` scripts **Outputs** 2 tsv files is saved   9-0e05-41d4-b69f-841a2aa131ec) and aggregated score is displayed.  --------- Co-authored-by: ryannikolaidis <1208590+ryannikolaidis@users.noreply.github.com> Co-authored-by: Klaijan <Klaijan@users.noreply.github.com> Co-authored-by: Yao You <theyaoyou@gmail.com>
This commit is contained in:
parent
f273a7cb83
commit
466255eec3
@ -2,6 +2,8 @@
|
||||
|
||||
### Enhancements
|
||||
|
||||
* **Add element type CI evaluation workflow** Adds element type frequency evaluation metrics to the current ingest workflow to measure the performance of each file extracted as well as aggregated-level performance.
|
||||
|
||||
### Features
|
||||
|
||||
### Fixes
|
||||
|
@ -17,7 +17,7 @@ selected_outputs=$(cat "$SCRIPT_DIR/metrics/metrics-json-manifest.txt")
|
||||
# If structured output file in this connector's outputs match the
|
||||
# selected outputs in the txt file, copy to the destination
|
||||
for file in "${structured_outputs[@]}"; do
|
||||
if [[ "${selected_outputs[*]}" =~ $(basename "$file") ]] ; then
|
||||
if [[ -f "$file" && "${selected_outputs[*]}" =~ $(basename "$file") ]] ; then
|
||||
echo "--- Copying $file to $CP_DIR ---"
|
||||
cp "$file" "$CP_DIR"
|
||||
fi
|
||||
|
@ -9,23 +9,36 @@ cd "$SCRIPT_DIR"/.. || exit 1
|
||||
OUTPUT_DIR=$SCRIPT_DIR/structured-output-eval
|
||||
mkdir -p "$OUTPUT_DIR"
|
||||
|
||||
EVAL_NAME="$1"
|
||||
|
||||
# Download cct test from s3
|
||||
BUCKET_NAME=utic-dev-tech-fixtures
|
||||
FOLDER_NAME=small-cct
|
||||
CCT_DIR=$SCRIPT_DIR/gold-standard/$FOLDER_NAME
|
||||
mkdir -p "$CCT_DIR"
|
||||
aws s3 cp "s3://$BUCKET_NAME/$FOLDER_NAME" "$CCT_DIR" --recursive --no-sign-request --region us-east-2
|
||||
FOLDER_NAME=small-eval-"$EVAL_NAME"
|
||||
LOCAL_EVAL_SOURCE_DIR=$SCRIPT_DIR/gold-standard/$FOLDER_NAME
|
||||
mkdir -p "$LOCAL_EVAL_SOURCE_DIR"
|
||||
aws s3 cp "s3://$BUCKET_NAME/$FOLDER_NAME" "$LOCAL_EVAL_SOURCE_DIR" --recursive --no-sign-request --region us-east-2
|
||||
|
||||
EXPORT_DIR="$SCRIPT_DIR"/metrics
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source "$SCRIPT_DIR"/cleanup.sh
|
||||
function cleanup() {
|
||||
cleanup_dir "$OUTPUT_DIR"
|
||||
cleanup_dir "$CCT_DIR"
|
||||
cleanup_dir "$LOCAL_EVAL_SOURCE_DIR"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
EXPORT_DIR="$SCRIPT_DIR"/metrics
|
||||
if [ "$EVAL_NAME" == "text-extraction" ]; then
|
||||
STRATEGY="measure-text-edit-distance"
|
||||
elif [ "$EVAL_NAME" == "element-type" ]; then
|
||||
STRATEGY="measure-element-type-accuracy"
|
||||
else
|
||||
echo "Wrong evaluation strategy given. Got [ $EVAL_NAME ]."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PYTHONPATH=. ./unstructured/ingest/evaluate.py \
|
||||
$STRATEGY \
|
||||
--output_dir "$OUTPUT_DIR" \
|
||||
--source_dir "$CCT_DIR" \
|
||||
--export_dir "$EXPORT_DIR"
|
||||
--source_dir "$LOCAL_EVAL_SOURCE_DIR" \
|
||||
--export_dir "$EXPORT_DIR"
|
@ -1,3 +1,3 @@
|
||||
strategy average sample_sd population_sd count
|
||||
cct-accuracy 0.774 0.124 0.087 2
|
||||
cct-%missing 0.065 0.035 0.025 2
|
||||
cct-accuracy 0.777 0.088 0.072 3
|
||||
cct-%missing 0.087 0.045 0.037 3
|
||||
|
|
@ -0,0 +1,2 @@
|
||||
strategy average sample_sd population_sd count
|
||||
element-type-accuracy 0
|
|
@ -1,3 +1,4 @@
|
||||
filename connector cct-accuracy cct-%missing
|
||||
IRS-form-1987.pdf azure 0.783 0.13
|
||||
example-10k.html local 0.686 0.04
|
||||
science-exploration-1p.pptx box 0.861 0.09
|
||||
|
|
@ -0,0 +1 @@
|
||||
filename connector element-type-accuracy
|
|
@ -37,3 +37,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-num-files-output.sh 1 $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -48,3 +48,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--verbose
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -33,3 +33,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -39,3 +39,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -37,3 +37,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -45,3 +45,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -49,3 +49,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
python "$SCRIPT_DIR"/python/test-ingest-delta-table-output.py --table-uri "$DESTINATION_TABLE"
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -41,3 +41,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -48,3 +48,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -34,3 +34,5 @@ set +e
|
||||
# once we have an alternative encoder that is deterministic, we test the diff here
|
||||
# until then just validating the file was created
|
||||
"$SCRIPT_DIR"/check-num-files-output.sh 1 "$OUTPUT_FOLDER_NAME"
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -47,3 +47,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -52,3 +52,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
$ACCESS_TOKEN_FLAGS
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -38,3 +38,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-num-files-output.sh 2 $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -48,3 +48,5 @@ PYTHONPATH=. unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -68,3 +68,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -31,3 +31,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
set +e
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -33,3 +33,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
set +e
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -32,3 +32,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
set +e
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -42,3 +42,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -46,3 +46,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -46,3 +46,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -42,3 +42,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -35,3 +35,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--uncompress
|
||||
|
||||
"$SCRIPT_DIR"/check-num-files-output.sh 12 $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -47,3 +47,5 @@ AWS_SECRET_ACCESS_KEY=$secret_key AWS_ACCESS_KEY_ID=$access_key PYTHONPATH=. ./u
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -37,3 +37,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -58,3 +58,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -54,3 +54,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -44,3 +44,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-diff-expected-output.sh $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -35,3 +35,5 @@ PYTHONPATH=. ./unstructured/ingest/main.py \
|
||||
--work-dir "$WORK_DIR"
|
||||
|
||||
"$SCRIPT_DIR"/check-num-files-output.sh 3 $OUTPUT_FOLDER_NAME
|
||||
|
||||
"$SCRIPT_DIR"/evaluation-ingest-cp.sh "$OUTPUT_DIR" "$OUTPUT_FOLDER_NAME"
|
||||
|
@ -97,6 +97,13 @@ for test in "${all_tests[@]}"; do
|
||||
fi
|
||||
done
|
||||
|
||||
echo "--------- RUNNING SCRIPT evaluation-metrics.sh ---------"
|
||||
./test_unstructured_ingest/evaluation-metrics.sh
|
||||
echo "--------- FINISHED SCRIPT evaluation-metrics.sh ---------"
|
||||
all_eval=(
|
||||
'text-extraction'
|
||||
'element-type'
|
||||
)
|
||||
for eval in "${all_eval[@]}"; do
|
||||
CURRENT_TEST="$eval"
|
||||
echo "--------- RUNNING SCRIPT $eval ---------"
|
||||
./test_unstructured_ingest/evaluation-metrics.sh "$eval"
|
||||
echo "--------- FINISHED SCRIPT $eval ---------"
|
||||
done
|
@ -8,6 +8,10 @@ from typing import Any, List, Optional, Tuple
|
||||
|
||||
import click
|
||||
|
||||
from unstructured.metrics.element_type import (
|
||||
calculate_element_type_percent_match,
|
||||
get_element_type_frequency,
|
||||
)
|
||||
from unstructured.metrics.text_extraction import calculate_accuracy, calculate_percent_missing_text
|
||||
from unstructured.staging.base import elements_from_json, elements_to_text
|
||||
|
||||
@ -24,7 +28,12 @@ if "ingest_log_handler" not in [h.name for h in logger.handlers]:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.group()
|
||||
def main():
|
||||
pass
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--output_dir", type=click.STRING, help="Directory to a structured output.")
|
||||
@click.option(
|
||||
"--output_list",
|
||||
@ -56,7 +65,7 @@ logger.setLevel(logging.DEBUG)
|
||||
help="A tuple of weights to the Levenshtein distance calculation. \
|
||||
See text_extraction.py/calculate_edit_distance for more details.",
|
||||
)
|
||||
def measure_edit_distance(
|
||||
def measure_text_edit_distance(
|
||||
output_dir: str,
|
||||
output_list: Optional[List[str]],
|
||||
source_dir: str,
|
||||
@ -123,6 +132,74 @@ def measure_edit_distance(
|
||||
_display(agg_rows, headers)
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.option("--output_dir", type=click.STRING, help="Directory to a structured output.")
|
||||
@click.option(
|
||||
"--output_list",
|
||||
type=click.STRING,
|
||||
multiple=True,
|
||||
help="Optional: list of selected structured output file names under the \
|
||||
directory to be evaluate. If none, all files under directory will be use.",
|
||||
)
|
||||
@click.option("--source_dir", type=click.STRING, help="Directory to a structured source.")
|
||||
@click.option(
|
||||
"--source_list",
|
||||
type=click.STRING,
|
||||
multiple=True,
|
||||
help="Optional: list of selected structured source file names under the directory \
|
||||
to be evaluate. If none, all files under directory will be use.",
|
||||
)
|
||||
@click.option(
|
||||
"--export_dir",
|
||||
type=click.STRING,
|
||||
default="metrics",
|
||||
help="Directory to save the output evaluation metrics to. Default to \
|
||||
[your_working_dir]/metrics/",
|
||||
)
|
||||
def measure_element_type_accuracy(
|
||||
output_dir: str,
|
||||
output_list: Optional[List[str]],
|
||||
source_dir: str,
|
||||
source_list: Optional[List[str]],
|
||||
export_dir: str,
|
||||
):
|
||||
if not output_list:
|
||||
output_list = _listdir_recursive(output_dir)
|
||||
if not source_list:
|
||||
source_list = _listdir_recursive(source_dir)
|
||||
|
||||
rows = []
|
||||
accuracy_scores: List[float] = []
|
||||
|
||||
for doc in output_list: # type: ignore
|
||||
fn = (doc.split("/")[-1]).split(".json")[0]
|
||||
fn_json = fn + ".json"
|
||||
connector = doc.split("/")[0]
|
||||
if fn_json in source_list: # type: ignore
|
||||
output = get_element_type_frequency(_read_text(os.path.join(output_dir, doc)))
|
||||
source = get_element_type_frequency(_read_text(os.path.join(source_dir, fn_json)))
|
||||
accuracy = round(calculate_element_type_percent_match(output, source), 3)
|
||||
rows.append([fn, connector, accuracy])
|
||||
accuracy_scores.append(accuracy)
|
||||
|
||||
headers = ["filename", "connector", "element-type-accuracy"]
|
||||
_write_to_file(export_dir, "all-docs-element-type.tsv", rows, headers)
|
||||
|
||||
headers = ["strategy", "average", "sample_sd", "population_sd", "count"]
|
||||
agg_rows = []
|
||||
agg_rows.append(
|
||||
[
|
||||
"element-type-accuracy",
|
||||
_mean(accuracy_scores),
|
||||
_stdev(accuracy_scores),
|
||||
_pstdev(accuracy_scores),
|
||||
len(accuracy_scores),
|
||||
],
|
||||
)
|
||||
_write_to_file(export_dir, "aggregate-scores-element-type.tsv", agg_rows, headers)
|
||||
_display(agg_rows, headers)
|
||||
|
||||
|
||||
def _listdir_recursive(dir: str):
|
||||
listdir = []
|
||||
for dirpath, _, filenames in os.walk(dir):
|
||||
@ -164,9 +241,15 @@ def _display(rows, headers):
|
||||
|
||||
|
||||
def _mean(scores: List[float], rounding: Optional[int] = 3):
|
||||
if len(scores) < 1:
|
||||
return None
|
||||
elif len(scores) == 1:
|
||||
mean = scores[0]
|
||||
else:
|
||||
mean = statistics.mean(scores)
|
||||
if not rounding:
|
||||
return statistics.mean(scores)
|
||||
return round(statistics.mean(scores), rounding)
|
||||
return mean
|
||||
return round(mean, rounding)
|
||||
|
||||
|
||||
def _stdev(scores: List[float], rounding: Optional[int] = 3):
|
||||
@ -185,5 +268,11 @@ def _pstdev(scores: List[float], rounding: Optional[int] = 3):
|
||||
return round(statistics.pstdev(scores), rounding)
|
||||
|
||||
|
||||
def _read_text(path):
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
return text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
measure_edit_distance()
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user