mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-25 17:37:57 +00:00
Added pagination Tableau data sources graphql (#12187)
* Added pagination tableau graphql * changed downstream workbook
This commit is contained in:
parent
df7f5a7309
commit
acf25f4555
@ -18,6 +18,7 @@ source:
|
|||||||
siteName: site_name
|
siteName: site_name
|
||||||
siteUrl: site_url
|
siteUrl: site_url
|
||||||
apiVersion: api_version
|
apiVersion: api_version
|
||||||
|
paginationLimit: 10
|
||||||
sourceConfig:
|
sourceConfig:
|
||||||
config:
|
config:
|
||||||
type: DashboardMetadata
|
type: DashboardMetadata
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
"""
|
"""
|
||||||
Wrapper module of TableauServerConnection client
|
Wrapper module of TableauServerConnection client
|
||||||
"""
|
"""
|
||||||
|
import math
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Any, Callable, Dict, List, Optional
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
@ -23,9 +24,11 @@ from metadata.ingestion.source.dashboard.tableau import (
|
|||||||
TABLEAU_GET_WORKBOOKS_PARAM_DICT,
|
TABLEAU_GET_WORKBOOKS_PARAM_DICT,
|
||||||
)
|
)
|
||||||
from metadata.ingestion.source.dashboard.tableau.models import (
|
from metadata.ingestion.source.dashboard.tableau.models import (
|
||||||
|
DataSource,
|
||||||
TableauChart,
|
TableauChart,
|
||||||
TableauDashboard,
|
TableauDashboard,
|
||||||
TableauDatasources,
|
TableauDatasources,
|
||||||
|
TableauDatasourcesConnection,
|
||||||
TableauOwner,
|
TableauOwner,
|
||||||
)
|
)
|
||||||
from metadata.ingestion.source.dashboard.tableau.queries import (
|
from metadata.ingestion.source.dashboard.tableau.queries import (
|
||||||
@ -49,7 +52,13 @@ class TableauClient:
|
|||||||
|
|
||||||
_client: TableauServerConnection
|
_client: TableauServerConnection
|
||||||
|
|
||||||
def __init__(self, config: Dict[str, Dict[str, Any]], env: str, ssl_verify: bool):
|
def __init__(
|
||||||
|
self,
|
||||||
|
config: Dict[str, Dict[str, Any]],
|
||||||
|
env: str,
|
||||||
|
ssl_verify: bool,
|
||||||
|
pagination_limit: int,
|
||||||
|
):
|
||||||
# ssl_verify is typed as a `bool` in TableauServerConnection
|
# ssl_verify is typed as a `bool` in TableauServerConnection
|
||||||
# However, it is passed as `verify=self.ssl_verify` in each `requests` call.
|
# However, it is passed as `verify=self.ssl_verify` in each `requests` call.
|
||||||
# In requests (https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification)
|
# In requests (https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification)
|
||||||
@ -60,6 +69,7 @@ class TableauClient:
|
|||||||
ssl_verify=ssl_verify,
|
ssl_verify=ssl_verify,
|
||||||
)
|
)
|
||||||
self._client.sign_in().json()
|
self._client.sign_in().json()
|
||||||
|
self.pagination_limit = pagination_limit
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def server_info(self) -> Callable:
|
def server_info(self) -> Callable:
|
||||||
@ -106,15 +116,25 @@ class TableauClient:
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_datasources(self):
|
def _query_datasources(
|
||||||
|
self, entities_per_page: int, offset: int
|
||||||
|
) -> Optional[TableauDatasources]:
|
||||||
|
"""
|
||||||
|
Method to query the graphql endpoint to get data sources
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
datasources_graphql_result = self._client.metadata_graphql_query(
|
datasources_graphql_result = self._client.metadata_graphql_query(
|
||||||
query=TABLEAU_DATASOURCES_QUERY
|
query=TABLEAU_DATASOURCES_QUERY.format(
|
||||||
|
first=entities_per_page, offset=offset
|
||||||
|
)
|
||||||
)
|
)
|
||||||
if datasources_graphql_result:
|
if datasources_graphql_result:
|
||||||
resp = datasources_graphql_result.json()
|
resp = datasources_graphql_result.json()
|
||||||
if resp and resp.get("data"):
|
if resp and resp.get("data"):
|
||||||
return TableauDatasources(**resp.get("data"))
|
tableau_datasource_connection = TableauDatasourcesConnection(
|
||||||
|
**resp.get("data")
|
||||||
|
)
|
||||||
|
return tableau_datasource_connection.embeddedDatasourcesConnection
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
logger.warning(
|
logger.warning(
|
||||||
@ -124,7 +144,32 @@ class TableauClient:
|
|||||||
"https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html"
|
"https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html"
|
||||||
"#enable-the-tableau-metadata-api-for-tableau-server\n"
|
"#enable-the-tableau-metadata-api-for-tableau-server\n"
|
||||||
)
|
)
|
||||||
return TableauDatasources(embeddedDatasources=[])
|
return None
|
||||||
|
|
||||||
|
def get_datasources(self) -> Optional[List[DataSource]]:
|
||||||
|
"""
|
||||||
|
Paginate and get the list of all data sources
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Query the graphql endpoint once to get total count of data sources
|
||||||
|
tableau_datasource = self._query_datasources(entities_per_page=1, offset=1)
|
||||||
|
entities_per_page = min(50, self.pagination_limit)
|
||||||
|
indexes = math.ceil(tableau_datasource.totalCount / entities_per_page)
|
||||||
|
|
||||||
|
# Paginate the results
|
||||||
|
data_sources = []
|
||||||
|
for index in range(indexes):
|
||||||
|
offset = index * entities_per_page
|
||||||
|
tableau_datasource = self._query_datasources(
|
||||||
|
entities_per_page=entities_per_page, offset=offset
|
||||||
|
)
|
||||||
|
if tableau_datasource:
|
||||||
|
data_sources.extend(tableau_datasource.nodes)
|
||||||
|
return data_sources
|
||||||
|
except Exception:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.warning("Unable to fetch Data Sources")
|
||||||
|
return None
|
||||||
|
|
||||||
def sign_out(self) -> None:
|
def sign_out(self) -> None:
|
||||||
self._client.sign_out()
|
self._client.sign_out()
|
||||||
|
@ -55,6 +55,7 @@ def get_connection(connection: TableauConnection) -> TableauClient:
|
|||||||
config=tableau_server_config,
|
config=tableau_server_config,
|
||||||
env=connection.env,
|
env=connection.env,
|
||||||
ssl_verify=get_verify_ssl(connection.sslConfig),
|
ssl_verify=get_verify_ssl(connection.sslConfig),
|
||||||
|
pagination_limit=connection.paginationLimit,
|
||||||
)
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug(traceback.format_exc())
|
logger.debug(traceback.format_exc())
|
||||||
|
@ -108,10 +108,9 @@ class TableauSource(DashboardServiceSource):
|
|||||||
chart for chart in charts if chart.workbook.id == workbook.id
|
chart for chart in charts if chart.workbook.id == workbook.id
|
||||||
]
|
]
|
||||||
|
|
||||||
for data_model in data_models.embeddedDatasources:
|
for data_model in data_models or []:
|
||||||
for downstream_workbooks in data_model.downstreamWorkbooks or []:
|
if data_model.workbook and data_model.workbook.luid == workbook.id:
|
||||||
if downstream_workbooks.luid == workbook.id:
|
workbook.dataModels.append(data_model)
|
||||||
workbook.dataModels.append(data_model)
|
|
||||||
|
|
||||||
# collect all the tags from charts and workbooks before yielding final entities
|
# collect all the tags from charts and workbooks before yielding final entities
|
||||||
if self.source_config.includeTags:
|
if self.source_config.includeTags:
|
||||||
|
@ -100,7 +100,7 @@ class DatasourceField(BaseModel):
|
|||||||
description: Optional[str]
|
description: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
class DownstreamWorkbook(BaseModel):
|
class Workbook(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
luid: str
|
luid: str
|
||||||
name: str
|
name: str
|
||||||
@ -131,12 +131,17 @@ class DataSource(BaseModel):
|
|||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
fields: Optional[List[DatasourceField]]
|
fields: Optional[List[DatasourceField]]
|
||||||
downstreamWorkbooks: Optional[List[DownstreamWorkbook]]
|
workbook: Optional[Workbook]
|
||||||
upstreamTables: Optional[List[UpstreamTable]]
|
upstreamTables: Optional[List[UpstreamTable]]
|
||||||
|
|
||||||
|
|
||||||
class TableauDatasources(BaseModel):
|
class TableauDatasources(BaseModel):
|
||||||
embeddedDatasources: Optional[List[DataSource]]
|
nodes: Optional[List[DataSource]]
|
||||||
|
totalCount: Optional[int]
|
||||||
|
|
||||||
|
|
||||||
|
class TableauDatasourcesConnection(BaseModel):
|
||||||
|
embeddedDatasourcesConnection: Optional[TableauDatasources]
|
||||||
|
|
||||||
|
|
||||||
class TableauChart(TableauBaseModel):
|
class TableauChart(TableauBaseModel):
|
||||||
|
@ -14,46 +14,49 @@ GraphQL queries used during ingestion
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
TABLEAU_DATASOURCES_QUERY = """
|
TABLEAU_DATASOURCES_QUERY = """
|
||||||
query {
|
{{
|
||||||
embeddedDatasources {
|
embeddedDatasourcesConnection(first: {first}, offset: {offset} ) {{
|
||||||
id
|
nodes {{
|
||||||
name
|
|
||||||
fields {
|
|
||||||
id
|
id
|
||||||
name
|
name
|
||||||
upstreamColumns{
|
fields {{
|
||||||
id
|
id
|
||||||
name
|
name
|
||||||
remoteType
|
upstreamColumns{{
|
||||||
}
|
id
|
||||||
fullyQualifiedName
|
name
|
||||||
description
|
remoteType
|
||||||
}
|
}}
|
||||||
downstreamWorkbooks {
|
fullyQualifiedName
|
||||||
id
|
description
|
||||||
luid
|
}}
|
||||||
name
|
workbook {{
|
||||||
}
|
|
||||||
upstreamTables {
|
|
||||||
id
|
|
||||||
luid
|
|
||||||
name
|
|
||||||
fullName
|
|
||||||
schema
|
|
||||||
referencedByQueries {
|
|
||||||
id
|
id
|
||||||
|
luid
|
||||||
name
|
name
|
||||||
query
|
}}
|
||||||
}
|
upstreamTables {{
|
||||||
columns {
|
|
||||||
id
|
id
|
||||||
|
luid
|
||||||
name
|
name
|
||||||
}
|
fullName
|
||||||
database {
|
schema
|
||||||
id
|
referencedByQueries {{
|
||||||
name
|
id
|
||||||
}
|
name
|
||||||
}
|
query
|
||||||
}
|
}}
|
||||||
}
|
columns {{
|
||||||
|
id
|
||||||
|
name
|
||||||
|
}}
|
||||||
|
database {{
|
||||||
|
id
|
||||||
|
name
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
totalCount
|
||||||
|
}}
|
||||||
|
}}
|
||||||
"""
|
"""
|
||||||
|
@ -116,6 +116,12 @@ This is a sample config for Tableau:
|
|||||||
|
|
||||||
{% /codeInfo %}
|
{% /codeInfo %}
|
||||||
|
|
||||||
|
{% codeInfo srNumber=18 %}
|
||||||
|
|
||||||
|
**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||||
|
|
||||||
|
{% /codeInfo %}
|
||||||
|
|
||||||
#### Source Configuration - Source Config
|
#### Source Configuration - Source Config
|
||||||
|
|
||||||
{% codeInfo srNumber=8 %}
|
{% codeInfo srNumber=8 %}
|
||||||
@ -186,6 +192,9 @@ source:
|
|||||||
```yaml {% srNumber=7 %}
|
```yaml {% srNumber=7 %}
|
||||||
apiVersion: api_version
|
apiVersion: api_version
|
||||||
```
|
```
|
||||||
|
```yaml {% srNumber=18 %}
|
||||||
|
paginationLimit: pagination_limit
|
||||||
|
```
|
||||||
```yaml {% srNumber=8 %}
|
```yaml {% srNumber=8 %}
|
||||||
sourceConfig:
|
sourceConfig:
|
||||||
config:
|
config:
|
||||||
|
@ -116,6 +116,12 @@ This is a sample config for Tableau:
|
|||||||
|
|
||||||
{% /codeInfo %}
|
{% /codeInfo %}
|
||||||
|
|
||||||
|
{% codeInfo srNumber=11 %}
|
||||||
|
|
||||||
|
**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||||
|
|
||||||
|
{% /codeInfo %}
|
||||||
|
|
||||||
#### Source Configuration - Source Config
|
#### Source Configuration - Source Config
|
||||||
|
|
||||||
{% codeInfo srNumber=8 %}
|
{% codeInfo srNumber=8 %}
|
||||||
@ -186,6 +192,9 @@ source:
|
|||||||
```yaml {% srNumber=7 %}
|
```yaml {% srNumber=7 %}
|
||||||
apiVersion: api_version
|
apiVersion: api_version
|
||||||
```
|
```
|
||||||
|
```yaml {% srNumber=11 %}
|
||||||
|
paginationLimit: pagination_limit
|
||||||
|
```
|
||||||
```yaml {% srNumber=8 %}
|
```yaml {% srNumber=8 %}
|
||||||
sourceConfig:
|
sourceConfig:
|
||||||
config:
|
config:
|
||||||
|
@ -221,6 +221,7 @@ For more information to get a Personal Access Token please visit this [link](htt
|
|||||||
- **Site Name**: This corresponds to the `contentUrl` attribute in the Tableau REST API. The `site_name` is the portion of the URL that follows the `/site/` in the URL.
|
- **Site Name**: This corresponds to the `contentUrl` attribute in the Tableau REST API. The `site_name` is the portion of the URL that follows the `/site/` in the URL.
|
||||||
- **Site URL**: If it is empty, the default Tableau site name will be used.
|
- **Site URL**: If it is empty, the default Tableau site name will be used.
|
||||||
- **Environment**: The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
|
- **Environment**: The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
|
||||||
|
- **Pagination Limit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||||
|
|
||||||
{% /extraContent %}
|
{% /extraContent %}
|
||||||
|
|
||||||
|
@ -61,6 +61,12 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "tableau_prod"
|
"default": "tableau_prod"
|
||||||
},
|
},
|
||||||
|
"paginationLimit": {
|
||||||
|
"title": "Pagination Limit",
|
||||||
|
"description": "Pagination limit used while querying the tableau metadata API for getting data sources",
|
||||||
|
"type": "integer",
|
||||||
|
"default": 10
|
||||||
|
},
|
||||||
"verifySSL": {
|
"verifySSL": {
|
||||||
"$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL",
|
"$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL",
|
||||||
"default": "no-ssl"
|
"default": "no-ssl"
|
||||||
|
@ -85,6 +85,12 @@ $$section
|
|||||||
The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
|
The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
|
||||||
$$
|
$$
|
||||||
|
|
||||||
|
$$section
|
||||||
|
### Pagination Limit $(id="paginationLimit")
|
||||||
|
|
||||||
|
The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||||
|
$$
|
||||||
|
|
||||||
$$section
|
$$section
|
||||||
### Verify SSL $(id="verifySSL")
|
### Verify SSL $(id="verifySSL")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user