mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-07-28 03:12:54 +00:00

* Unify CI tests (from #2466) * Update Documentation & Code Style * Change folder names * Fix markers list * Remove marker 'slow', replaced with 'integration' * Soften children check * Start ES first so it has time to boot while Python is setup * Run the full workflow * Try to make pip upgrade on Windows * Set KG tests as integration * Update Documentation & Code Style * typo * faster pylint * Make Pylint use the cache * filter diff files for pylint * debug pylint statement * revert pylint changes * Remove path from asserted log (fails on Windows) * Skip preprocessor test on Windows * Tackling Windows specific failures * Fix pytest command for windows suites * Remove \ from command * Move poppler test into integration * Skip opensearch test on windows * Add tolerance in reader sas score for Windows * Another pytorch approx * Raise time limit for unit tests :( * Skip poppler test on Windows CI * Specify to pull with FF only in docs check * temporarily run the docs check immediately * Allow merge commit for now * Try without fetch depth * Accelerating test * Accelerating test * Add repository and ref alongside fetch-depth * Separate out code&docs check from tests * Use setup-python cache * Delete custom action * Remove the pull step in the docs check, will find a way to run on bot commits * Add requirements.txt in .github for caching * Actually install dependencies * Change deps group for pylint * Unclear why the requirements.txt is still required :/ * Fix the code check python setup * Install all deps for pylint * Make the autoformat check depend on tests and doc updates workflows * Try installing dependencies in another order * Try again to install the deps * quoting the paths * Ad back the requirements * Try again to install rest_api and ui * Change deps group * Duplicate haystack install line * See if the cache is the problem * Disable also in mypy, who knows * split the install step * Split install step everywhere * Revert "Separate out code&docs check from tests" This reverts commit 1cd59b15ffc5b984e1d642dcbf4c8ccc2bb6c9bd. * Add back the action * Proactive support for audio (see text2speech branch) * Fix label generator tests * Remove install of libsndfile1 on win temporarily * exclude audio tests on win * install ffmpeg for integration tests Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
63 lines
2.6 KiB
Python
63 lines
2.6 KiB
Python
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from haystack.nodes import Text2SparqlRetriever
|
|
from haystack.document_stores import GraphDBKnowledgeGraph
|
|
from haystack.utils import fetch_archive_from_http
|
|
|
|
|
|
@pytest.mark.graphdb
|
|
@pytest.mark.integration
|
|
def test_graph_retrieval():
|
|
# TODO rename doc_dir
|
|
graph_dir = "../data/tutorial10_knowledge_graph/"
|
|
s3_url = "https://fandom-qa.s3-eu-west-1.amazonaws.com/triples_and_config.zip"
|
|
fetch_archive_from_http(url=s3_url, output_dir=graph_dir)
|
|
|
|
# Fetch a pre-trained BART model that translates natural language questions to SPARQL queries
|
|
model_dir = "../saved_models/tutorial10_knowledge_graph/"
|
|
s3_url = "https://fandom-qa.s3-eu-west-1.amazonaws.com/saved_models/hp_v3.4.zip"
|
|
fetch_archive_from_http(url=s3_url, output_dir=model_dir)
|
|
|
|
kg = GraphDBKnowledgeGraph(index="tutorial_10_index")
|
|
kg.delete_index()
|
|
kg.create_index(config_path=Path(graph_dir + "repo-config.ttl"))
|
|
kg.import_from_ttl_file(index="tutorial_10_index", path=Path(graph_dir + "triples.ttl"))
|
|
triple = {
|
|
"p": {"type": "uri", "value": "https://deepset.ai/harry_potter/_paternalgrandfather"},
|
|
"s": {"type": "uri", "value": "https://deepset.ai/harry_potter/Melody_fawley"},
|
|
"o": {"type": "uri", "value": "https://deepset.ai/harry_potter/Marshall_fawley"},
|
|
}
|
|
triples = kg.get_all_triples()
|
|
assert len(triples) > 0
|
|
assert triple in triples
|
|
|
|
# Define prefixes for names of resources so that we can use shorter resource names in queries
|
|
prefixes = """PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
|
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
|
|
PREFIX hp: <https://deepset.ai/harry_potter/>
|
|
"""
|
|
kg.prefixes = prefixes
|
|
|
|
kgqa_retriever = Text2SparqlRetriever(knowledge_graph=kg, model_name_or_path=model_dir + "hp_v3.4")
|
|
|
|
result = kgqa_retriever.retrieve(query="In which house is Harry Potter?")
|
|
assert result[0] == {
|
|
"answer": ["https://deepset.ai/harry_potter/Gryffindor"],
|
|
"prediction_meta": {
|
|
"model": "Text2SparqlRetriever",
|
|
"sparql_query": "select ?a { hp:Harry_potter hp:house ?a . }",
|
|
},
|
|
}
|
|
|
|
result = kgqa_retriever._query_kg(
|
|
sparql_query="select distinct ?sbj where { ?sbj hp:job hp:Keeper_of_keys_and_grounds . }"
|
|
)
|
|
assert result[0][0] == "https://deepset.ai/harry_potter/Rubeus_hagrid"
|
|
|
|
result = kgqa_retriever._query_kg(
|
|
sparql_query="select distinct ?obj where { <https://deepset.ai/harry_potter/Hermione_granger> <https://deepset.ai/harry_potter/patronus> ?obj . }"
|
|
)
|
|
assert result[0][0] == "https://deepset.ai/harry_potter/Otter"
|