mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-19 06:38:04 +00:00

Co-authored-by: kbartlett <keith.bartlett@fullsight.org> Co-authored-by: Andrew Sikowitz <andrew.sikowitz@acryl.io> Co-authored-by: Jay Feldman <8128360+feldjay@users.noreply.github.com> Co-authored-by: Harshal Sheth <hsheth2@gmail.com> Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Co-authored-by: Shirshanka Das <shirshanka@apache.org> Co-authored-by: deepgarg-visa <149145061+deepgarg-visa@users.noreply.github.com> Co-authored-by: Felix Lüdin <13187726+Masterchen09@users.noreply.github.com>
222 lines
6.7 KiB
Python
222 lines
6.7 KiB
Python
import unittest
|
|
from pathlib import Path
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
|
|
from datahub.ingestion.api.common import PipelineContext
|
|
from datahub.ingestion.source.neo4j.neo4j_source import Neo4jConfig, Neo4jSource
|
|
|
|
|
|
@pytest.fixture
|
|
def tracking_uri(tmp_path: Path) -> str:
|
|
# return str(tmp_path / "neo4j")
|
|
return "neo4j+ssc://host:7687"
|
|
|
|
|
|
@pytest.fixture
|
|
def source(tracking_uri: str) -> Neo4jSource:
|
|
return Neo4jSource(
|
|
ctx=PipelineContext(run_id="neo4j-test"),
|
|
config=Neo4jConfig(
|
|
uri=tracking_uri, env="Prod", username="test", password="test"
|
|
),
|
|
)
|
|
|
|
|
|
def data():
|
|
return [
|
|
{
|
|
"key": "Node_1",
|
|
"value": {
|
|
"count": 433026,
|
|
"relationships": {
|
|
"RELATIONSHIP_1": {
|
|
"count": 1,
|
|
"properties": {
|
|
"Relationship1_Property1": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"array": False,
|
|
}
|
|
},
|
|
"direction": "in",
|
|
"labels": ["Node_2"],
|
|
}
|
|
},
|
|
"RELATIONSHIP_2": {
|
|
"count": 2,
|
|
"properties": {
|
|
"Relationship2_Property1": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"array": False,
|
|
}
|
|
},
|
|
"direction": "in",
|
|
"labels": ["Node_3"],
|
|
},
|
|
"type": "node",
|
|
"properties": {
|
|
"Node1_Property1": {
|
|
"existence": False,
|
|
"type": "DATE",
|
|
"indexed": False,
|
|
"unique": False,
|
|
},
|
|
"Node1_Property2": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"unique": False,
|
|
},
|
|
"Node1_Property3": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"unique": False,
|
|
},
|
|
},
|
|
"labels": [],
|
|
},
|
|
},
|
|
{
|
|
"key": "Node_2",
|
|
"value": {
|
|
"count": 3,
|
|
"relationships": {
|
|
"RELATIONSHIP_1": {
|
|
"count": 1,
|
|
"properties": {
|
|
"Relationship1_Property1": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"array": False,
|
|
}
|
|
},
|
|
"direction": "out",
|
|
"labels": ["Node_2"],
|
|
}
|
|
},
|
|
"type": "node",
|
|
"properties": {
|
|
"Node2_Property1": {
|
|
"existence": False,
|
|
"type": "DATE",
|
|
"indexed": False,
|
|
"unique": False,
|
|
},
|
|
"Node2_Property2": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"unique": False,
|
|
},
|
|
"Node2_Property3": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"unique": False,
|
|
},
|
|
},
|
|
"labels": [],
|
|
},
|
|
},
|
|
{
|
|
"key": "RELATIONSHIP_1",
|
|
"value": {
|
|
"count": 4,
|
|
"type": "relationship",
|
|
"properties": {
|
|
"Relationship1_Property1": {
|
|
"existence": False,
|
|
"type": "STRING",
|
|
"indexed": False,
|
|
"array": False,
|
|
}
|
|
},
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
def test_process_nodes(source):
|
|
df = source.process_nodes(data=data())
|
|
assert type(df) is pd.DataFrame
|
|
|
|
|
|
def test_process_relationships(source):
|
|
df = source.process_relationships(
|
|
data=data(), node_df=source.process_nodes(data=data())
|
|
)
|
|
assert type(df) is pd.DataFrame
|
|
|
|
|
|
def test_get_obj_type(source):
|
|
results = data()
|
|
assert source.get_obj_type(results[0]["value"]) == "node"
|
|
assert source.get_obj_type(results[1]["value"]) == "node"
|
|
assert source.get_obj_type(results[2]["value"]) == "relationship"
|
|
|
|
|
|
def test_get_node_description(source):
|
|
results = data()
|
|
df = source.process_nodes(data=data())
|
|
assert (
|
|
source.get_node_description(results[0], df)
|
|
== "(Node_1)<-[RELATIONSHIP_1]-(Node_2)"
|
|
)
|
|
assert (
|
|
source.get_node_description(results[1], df)
|
|
== "(Node_2)-[RELATIONSHIP_1]->(Node_2)"
|
|
)
|
|
|
|
|
|
def test_get_property_data_types(source):
|
|
results = data()
|
|
assert source.get_property_data_types(results[0]["value"]["properties"]) == [
|
|
{"Node1_Property1": "DATE"},
|
|
{"Node1_Property2": "STRING"},
|
|
{"Node1_Property3": "STRING"},
|
|
]
|
|
assert source.get_property_data_types(results[1]["value"]["properties"]) == [
|
|
{"Node2_Property1": "DATE"},
|
|
{"Node2_Property2": "STRING"},
|
|
{"Node2_Property3": "STRING"},
|
|
]
|
|
assert source.get_property_data_types(results[2]["value"]["properties"]) == [
|
|
{"Relationship1_Property1": "STRING"}
|
|
]
|
|
|
|
|
|
def test_get_properties(source):
|
|
results = data()
|
|
assert list(source.get_properties(results[0]["value"]).keys()) == [
|
|
"Node1_Property1",
|
|
"Node1_Property2",
|
|
"Node1_Property3",
|
|
]
|
|
assert list(source.get_properties(results[1]["value"]).keys()) == [
|
|
"Node2_Property1",
|
|
"Node2_Property2",
|
|
"Node2_Property3",
|
|
]
|
|
assert list(source.get_properties(results[2]["value"]).keys()) == [
|
|
"Relationship1_Property1"
|
|
]
|
|
|
|
|
|
def test_get_relationships(source):
|
|
results = data()
|
|
record = list(
|
|
results[0]["value"]["relationships"].keys()
|
|
) # Get the first key from the dict_keys
|
|
assert record == ["RELATIONSHIP_1"]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|