diff --git a/haystack/preprocessor/utils.py b/haystack/preprocessor/utils.py index 26bb725de..580936a58 100644 --- a/haystack/preprocessor/utils.py +++ b/haystack/preprocessor/utils.py @@ -32,11 +32,13 @@ def eval_data_from_file(filename: str) -> Tuple[List[Document], List[Label]]: with open(filename, "r") as file: data = json.load(file) + if "title" not in data["data"][0]: + logger.warning(f"No title information found for documents in QA file: {filename}") for document in data["data"]: # get all extra fields from document level (e.g. title) meta_doc = {k: v for k, v in document.items() if k not in ("paragraphs", "title")} for paragraph in document["paragraphs"]: - cur_meta = {"name": document["title"]} + cur_meta = {"name": document.get("title", None)} # all other fields from paragraph level meta_paragraph = {k: v for k, v in paragraph.items() if k not in ("qas", "context")} cur_meta.update(meta_paragraph)