mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-28 20:09:59 +00:00
feat(ingest/dbt-cloud): reduce graphql query complexity (#8390)
This commit is contained in:
parent
ea3e119210
commit
ae7e3c2080
@ -104,23 +104,20 @@ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS = """
|
|||||||
compiledCode
|
compiledCode
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_DBT_GRAPHQL_QUERY = f"""
|
_DBT_FIELDS_BY_TYPE = {
|
||||||
query DatahubMetadataQuery($jobId: Int!, $runId: Int) {{
|
"models": f"""
|
||||||
models(jobId: $jobId, runId: $runId) {{
|
|
||||||
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
|
{ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
|
||||||
dependsOn
|
dependsOn
|
||||||
materializedType
|
materializedType
|
||||||
}}
|
""",
|
||||||
|
"seeds": f"""
|
||||||
seeds(jobId: $jobId, runId: $runId) {{
|
|
||||||
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
|
{ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
|
||||||
}}
|
""",
|
||||||
|
"sources": f"""
|
||||||
sources(jobId: $jobId, runId: $runId) {{
|
|
||||||
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
||||||
identifier
|
identifier
|
||||||
@ -131,9 +128,8 @@ query DatahubMetadataQuery($jobId: Int!, $runId: Int) {{
|
|||||||
state
|
state
|
||||||
freshnessChecked
|
freshnessChecked
|
||||||
loader
|
loader
|
||||||
}}
|
""",
|
||||||
|
"snapshots": f"""
|
||||||
snapshots(jobId: $jobId, runId: $runId) {{
|
|
||||||
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_NODE_COMMON_FIELDS }
|
||||||
{ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
|
{ _DBT_GRAPHQL_MODEL_SEED_SNAPSHOT_FIELDS }
|
||||||
@ -143,9 +139,8 @@ query DatahubMetadataQuery($jobId: Int!, $runId: Int) {{
|
|||||||
parentsModels {{
|
parentsModels {{
|
||||||
uniqueId
|
uniqueId
|
||||||
}}
|
}}
|
||||||
}}
|
""",
|
||||||
|
"tests": f"""
|
||||||
tests(jobId: $jobId, runId: $runId) {{
|
|
||||||
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
{ _DBT_GRAPHQL_COMMON_FIELDS }
|
||||||
state
|
state
|
||||||
columnName
|
columnName
|
||||||
@ -159,12 +154,18 @@ query DatahubMetadataQuery($jobId: Int!, $runId: Int) {{
|
|||||||
rawCode
|
rawCode
|
||||||
compiledSql
|
compiledSql
|
||||||
compiledCode
|
compiledCode
|
||||||
}}
|
""",
|
||||||
|
# Currently unsupported dbt node types:
|
||||||
|
# - metrics
|
||||||
|
# - snapshots
|
||||||
|
# - exposures
|
||||||
|
}
|
||||||
|
|
||||||
# Currently unsupported dbt node types:
|
_DBT_GRAPHQL_QUERY = """
|
||||||
# - metrics
|
query DatahubMetadataQuery_{type}($jobId: Int!, $runId: Int) {{
|
||||||
# - snapshots
|
{type}(jobId: $jobId, runId: $runId) {{
|
||||||
# - exposures
|
{fields}
|
||||||
|
}}
|
||||||
}}
|
}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -206,15 +207,36 @@ class DBTCloudSource(DBTSourceBase):
|
|||||||
# Additionally, we'd like to model dbt Cloud jobs/runs in DataHub
|
# Additionally, we'd like to model dbt Cloud jobs/runs in DataHub
|
||||||
# as DataProcesses or DataJobs.
|
# as DataProcesses or DataJobs.
|
||||||
|
|
||||||
logger.debug("Sending graphql request to the dbt Cloud metadata API")
|
raw_nodes = []
|
||||||
response = requests.post(
|
for node_type, fields in _DBT_FIELDS_BY_TYPE.items():
|
||||||
self.config.metadata_endpoint,
|
logger.info(f"Fetching {node_type} from dbt Cloud")
|
||||||
json={
|
data = self._send_graphql_query(
|
||||||
"query": _DBT_GRAPHQL_QUERY,
|
query=_DBT_GRAPHQL_QUERY.format(type=node_type, fields=fields),
|
||||||
"variables": {
|
variables={
|
||||||
"jobId": self.config.job_id,
|
"jobId": self.config.job_id,
|
||||||
"runId": self.config.run_id,
|
"runId": self.config.run_id,
|
||||||
},
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
raw_nodes.extend(data[node_type])
|
||||||
|
|
||||||
|
nodes = [self._parse_into_dbt_node(node) for node in raw_nodes]
|
||||||
|
|
||||||
|
additional_metadata: Dict[str, Optional[str]] = {
|
||||||
|
"project_id": str(self.config.project_id),
|
||||||
|
"account_id": str(self.config.account_id),
|
||||||
|
"job_id": str(self.config.job_id),
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodes, additional_metadata
|
||||||
|
|
||||||
|
def _send_graphql_query(self, query: str, variables: Dict) -> Dict:
|
||||||
|
logger.debug(f"Sending GraphQL query to dbt Cloud: {query}")
|
||||||
|
response = requests.post(
|
||||||
|
self.config.metadata_endpoint,
|
||||||
|
json={
|
||||||
|
"query": query,
|
||||||
|
"variables": variables,
|
||||||
},
|
},
|
||||||
headers={
|
headers={
|
||||||
"Authorization": f"Bearer {self.config.token}",
|
"Authorization": f"Bearer {self.config.token}",
|
||||||
@ -233,23 +255,7 @@ class DBTCloudSource(DBTSourceBase):
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
raw_nodes = [
|
return data
|
||||||
*data["models"],
|
|
||||||
*data["seeds"],
|
|
||||||
*data["sources"],
|
|
||||||
*data["snapshots"],
|
|
||||||
*data["tests"],
|
|
||||||
]
|
|
||||||
|
|
||||||
nodes = [self._parse_into_dbt_node(node) for node in raw_nodes]
|
|
||||||
|
|
||||||
additional_metadata: Dict[str, Optional[str]] = {
|
|
||||||
"project_id": str(self.config.project_id),
|
|
||||||
"account_id": str(self.config.account_id),
|
|
||||||
"job_id": str(self.config.job_id),
|
|
||||||
}
|
|
||||||
|
|
||||||
return nodes, additional_metadata
|
|
||||||
|
|
||||||
def _parse_into_dbt_node(self, node: Dict) -> DBTNode:
|
def _parse_into_dbt_node(self, node: Dict) -> DBTNode:
|
||||||
key = node["uniqueId"]
|
key = node["uniqueId"]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user