Make title info optional when evaluating on QA data (#494)

* Add check for title present in QA file and make title extraction optional

* Make missing title None
This commit is contained in:
Timo Moeller 2020-10-23 11:06:56 +02:00 committed by GitHub
parent f62117c232
commit def8fd617a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -32,11 +32,13 @@ def eval_data_from_file(filename: str) -> Tuple[List[Document], List[Label]]:
with open(filename, "r") as file:
data = json.load(file)
if "title" not in data["data"][0]:
logger.warning(f"No title information found for documents in QA file: {filename}")
for document in data["data"]:
# get all extra fields from document level (e.g. title)
meta_doc = {k: v for k, v in document.items() if k not in ("paragraphs", "title")}
for paragraph in document["paragraphs"]:
cur_meta = {"name": document["title"]}
cur_meta = {"name": document.get("title", None)}
# all other fields from paragraph level
meta_paragraph = {k: v for k, v in paragraph.items() if k not in ("qas", "context")}
cur_meta.update(meta_paragraph)