haystack/test/test_knowledge_graph.py

50 lines
2.6 KiB
Python
Raw Normal View History

knowledge graph example (#934) * Add knowledge graph module * Fix type hint * Add graph retriver module * Change type annotations, change return format * Add graph retriever that executes questions as sparql queries * Linking only those entities that are in the knowledge graph * Added logging and using relations extracted from Knowledge graph for linking * Preventing entity linking from linking the same token to multiple entities * Pruning triples that have no variables for select and count queries * Support knowledge graphs with Pipelines * Add text2sparql * Entity linking and relation linking consider more special cases now based on evaluation on labelled data * Separating example code from KGQA implementation * Add eval on combined extarctive and kg questions * Remove references to hp-test * Add fields sparql_query and long_answer_list to metadata * Removing modular Question2SPARQL approach * Removing additional classes used for modular kgqa approach * preparing lcquad data * change graph db * Translating namespaces in knowledge graph queries * Creating graphdb index and loading triples from .ttl file * Fetching graph config files, triples and model from S3 * Fix incompatibility issues with BaseGraphRetriever and BaseComponent * Removing unused utility functions * Adding doc strings and tutorial header * Adding sparqlwrapper dependency * Moving tutorial header * Sorting tutorials by number within name of notebook * Add latest docstring and tutorial changes * Creating test cases for knowledge graph * Changing knowledge graph example to harry potter * Add latest docstring and tutorial changes * Adapting the tutorial notebook to harry potter example * Add GraphDB fixture for tests * Add latest docstring and tutorial changes * Added GraphDB docker launch to CI * Use correct GraphDB fixture * Check if GraphDB instance is already running * Renaming question/query and incorporating other feedback from Timo and Tanay * Removed type annotation * Add latest docstring and tutorial changes Co-authored-by: oryx1729 <oryx1729@protonmail.com> Co-authored-by: Timo Moeller <timo.moeller@deepset.ai> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2021-04-08 14:05:33 +02:00
from pathlib import Path
import pytest
from haystack.graph_retriever import Text2SparqlRetriever
from haystack.knowledge_graph.graphdb import GraphDBKnowledgeGraph
from haystack.preprocessor.utils import fetch_archive_from_http
@pytest.mark.graphdb
def test_graph_retrieval(graphdb_fixture):
# TODO rename doc_dir
graph_dir = "../data/tutorial10_knowledge_graph/"
s3_url = "https://fandom-qa.s3-eu-west-1.amazonaws.com/triples_and_config.zip"
fetch_archive_from_http(url=s3_url, output_dir=graph_dir)
# Fetch a pre-trained BART model that translates natural language questions to SPARQL queries
model_dir = "../saved_models/tutorial10_knowledge_graph/"
s3_url = "https://fandom-qa.s3-eu-west-1.amazonaws.com/saved_models/hp_v3.4.zip"
fetch_archive_from_http(url=s3_url, output_dir=model_dir)
kg = GraphDBKnowledgeGraph(index="tutorial_10_index")
kg.delete_index()
kg.create_index(config_path=Path(graph_dir+"repo-config.ttl"))
kg.import_from_ttl_file(index="tutorial_10_index",
path=Path(graph_dir+"triples.ttl"))
triple = {'p': {'type': 'uri', 'value': 'https://deepset.ai/harry_potter/_paternalgrandfather'}, 's': {'type': 'uri', 'value': 'https://deepset.ai/harry_potter/Melody_fawley'}, 'o': {'type': 'uri', 'value': 'https://deepset.ai/harry_potter/Marshall_fawley'}}
triples = kg.get_all_triples()
assert len(triples) > 0
assert triple in triples
# Define prefixes for names of resources so that we can use shorter resource names in queries
prefixes = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX hp: <https://deepset.ai/harry_potter/>
"""
kg.prefixes = prefixes
kgqa_retriever = Text2SparqlRetriever(knowledge_graph=kg,
model_name_or_path=model_dir+"hp_v3.4")
result = kgqa_retriever.retrieve(query="In which house is Harry Potter?")
assert result[0] == {'answer': ['https://deepset.ai/harry_potter/Gryffindor'], 'prediction_meta': {'model': 'Text2SparqlRetriever', 'sparql_query': 'select ?a { hp:Harry_potter hp:house ?a . }'}}
result = kgqa_retriever._query_kg(sparql_query="select distinct ?sbj where { ?sbj hp:job hp:Keeper_of_keys_and_grounds . }")
assert result[0][0] == "https://deepset.ai/harry_potter/Rubeus_hagrid"
result = kgqa_retriever._query_kg(
sparql_query="select distinct ?obj where { <https://deepset.ai/harry_potter/Hermione_granger> <https://deepset.ai/harry_potter/patronus> ?obj . }")
assert result[0][0] == "https://deepset.ai/harry_potter/Otter"