From 2c54c6d06c364988c4b77d41c77cebbd5e0434e6 Mon Sep 17 00:00:00 2001 From: Jake Poznanski Date: Thu, 3 Jul 2025 16:43:51 +0000 Subject: [PATCH] ALlow unicode in json --- olmocr/train/dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/olmocr/train/dataloader.py b/olmocr/train/dataloader.py index 66d93da..f19c84b 100644 --- a/olmocr/train/dataloader.py +++ b/olmocr/train/dataloader.py @@ -354,7 +354,7 @@ class JSONOutputFormat(PipelineStep): "is_table": page_data.is_table, "is_diagram": page_data.is_diagram, "natural_text": page_data.natural_text - }, ensure_ascii=True) + }, ensure_ascii=False) return sample