mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-08-22 07:58:06 +00:00
Added pagination Tableau data sources graphql (#12187)
* Added pagination tableau graphql * changed downstream workbook
This commit is contained in:
parent
df7f5a7309
commit
acf25f4555
@ -18,6 +18,7 @@ source:
|
||||
siteName: site_name
|
||||
siteUrl: site_url
|
||||
apiVersion: api_version
|
||||
paginationLimit: 10
|
||||
sourceConfig:
|
||||
config:
|
||||
type: DashboardMetadata
|
||||
|
@ -11,6 +11,7 @@
|
||||
"""
|
||||
Wrapper module of TableauServerConnection client
|
||||
"""
|
||||
import math
|
||||
import traceback
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
@ -23,9 +24,11 @@ from metadata.ingestion.source.dashboard.tableau import (
|
||||
TABLEAU_GET_WORKBOOKS_PARAM_DICT,
|
||||
)
|
||||
from metadata.ingestion.source.dashboard.tableau.models import (
|
||||
DataSource,
|
||||
TableauChart,
|
||||
TableauDashboard,
|
||||
TableauDatasources,
|
||||
TableauDatasourcesConnection,
|
||||
TableauOwner,
|
||||
)
|
||||
from metadata.ingestion.source.dashboard.tableau.queries import (
|
||||
@ -49,7 +52,13 @@ class TableauClient:
|
||||
|
||||
_client: TableauServerConnection
|
||||
|
||||
def __init__(self, config: Dict[str, Dict[str, Any]], env: str, ssl_verify: bool):
|
||||
def __init__(
|
||||
self,
|
||||
config: Dict[str, Dict[str, Any]],
|
||||
env: str,
|
||||
ssl_verify: bool,
|
||||
pagination_limit: int,
|
||||
):
|
||||
# ssl_verify is typed as a `bool` in TableauServerConnection
|
||||
# However, it is passed as `verify=self.ssl_verify` in each `requests` call.
|
||||
# In requests (https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification)
|
||||
@ -60,6 +69,7 @@ class TableauClient:
|
||||
ssl_verify=ssl_verify,
|
||||
)
|
||||
self._client.sign_in().json()
|
||||
self.pagination_limit = pagination_limit
|
||||
|
||||
@cached_property
|
||||
def server_info(self) -> Callable:
|
||||
@ -106,15 +116,25 @@ class TableauClient:
|
||||
)
|
||||
]
|
||||
|
||||
def get_datasources(self):
|
||||
def _query_datasources(
|
||||
self, entities_per_page: int, offset: int
|
||||
) -> Optional[TableauDatasources]:
|
||||
"""
|
||||
Method to query the graphql endpoint to get data sources
|
||||
"""
|
||||
try:
|
||||
datasources_graphql_result = self._client.metadata_graphql_query(
|
||||
query=TABLEAU_DATASOURCES_QUERY
|
||||
query=TABLEAU_DATASOURCES_QUERY.format(
|
||||
first=entities_per_page, offset=offset
|
||||
)
|
||||
)
|
||||
if datasources_graphql_result:
|
||||
resp = datasources_graphql_result.json()
|
||||
if resp and resp.get("data"):
|
||||
return TableauDatasources(**resp.get("data"))
|
||||
tableau_datasource_connection = TableauDatasourcesConnection(
|
||||
**resp.get("data")
|
||||
)
|
||||
return tableau_datasource_connection.embeddedDatasourcesConnection
|
||||
except Exception:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning(
|
||||
@ -124,7 +144,32 @@ class TableauClient:
|
||||
"https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html"
|
||||
"#enable-the-tableau-metadata-api-for-tableau-server\n"
|
||||
)
|
||||
return TableauDatasources(embeddedDatasources=[])
|
||||
return None
|
||||
|
||||
def get_datasources(self) -> Optional[List[DataSource]]:
|
||||
"""
|
||||
Paginate and get the list of all data sources
|
||||
"""
|
||||
try:
|
||||
# Query the graphql endpoint once to get total count of data sources
|
||||
tableau_datasource = self._query_datasources(entities_per_page=1, offset=1)
|
||||
entities_per_page = min(50, self.pagination_limit)
|
||||
indexes = math.ceil(tableau_datasource.totalCount / entities_per_page)
|
||||
|
||||
# Paginate the results
|
||||
data_sources = []
|
||||
for index in range(indexes):
|
||||
offset = index * entities_per_page
|
||||
tableau_datasource = self._query_datasources(
|
||||
entities_per_page=entities_per_page, offset=offset
|
||||
)
|
||||
if tableau_datasource:
|
||||
data_sources.extend(tableau_datasource.nodes)
|
||||
return data_sources
|
||||
except Exception:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.warning("Unable to fetch Data Sources")
|
||||
return None
|
||||
|
||||
def sign_out(self) -> None:
|
||||
self._client.sign_out()
|
||||
|
@ -55,6 +55,7 @@ def get_connection(connection: TableauConnection) -> TableauClient:
|
||||
config=tableau_server_config,
|
||||
env=connection.env,
|
||||
ssl_verify=get_verify_ssl(connection.sslConfig),
|
||||
pagination_limit=connection.paginationLimit,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug(traceback.format_exc())
|
||||
|
@ -108,10 +108,9 @@ class TableauSource(DashboardServiceSource):
|
||||
chart for chart in charts if chart.workbook.id == workbook.id
|
||||
]
|
||||
|
||||
for data_model in data_models.embeddedDatasources:
|
||||
for downstream_workbooks in data_model.downstreamWorkbooks or []:
|
||||
if downstream_workbooks.luid == workbook.id:
|
||||
workbook.dataModels.append(data_model)
|
||||
for data_model in data_models or []:
|
||||
if data_model.workbook and data_model.workbook.luid == workbook.id:
|
||||
workbook.dataModels.append(data_model)
|
||||
|
||||
# collect all the tags from charts and workbooks before yielding final entities
|
||||
if self.source_config.includeTags:
|
||||
|
@ -100,7 +100,7 @@ class DatasourceField(BaseModel):
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
class DownstreamWorkbook(BaseModel):
|
||||
class Workbook(BaseModel):
|
||||
id: str
|
||||
luid: str
|
||||
name: str
|
||||
@ -131,12 +131,17 @@ class DataSource(BaseModel):
|
||||
id: str
|
||||
name: str
|
||||
fields: Optional[List[DatasourceField]]
|
||||
downstreamWorkbooks: Optional[List[DownstreamWorkbook]]
|
||||
workbook: Optional[Workbook]
|
||||
upstreamTables: Optional[List[UpstreamTable]]
|
||||
|
||||
|
||||
class TableauDatasources(BaseModel):
|
||||
embeddedDatasources: Optional[List[DataSource]]
|
||||
nodes: Optional[List[DataSource]]
|
||||
totalCount: Optional[int]
|
||||
|
||||
|
||||
class TableauDatasourcesConnection(BaseModel):
|
||||
embeddedDatasourcesConnection: Optional[TableauDatasources]
|
||||
|
||||
|
||||
class TableauChart(TableauBaseModel):
|
||||
|
@ -14,46 +14,49 @@ GraphQL queries used during ingestion
|
||||
"""
|
||||
|
||||
TABLEAU_DATASOURCES_QUERY = """
|
||||
query {
|
||||
embeddedDatasources {
|
||||
id
|
||||
name
|
||||
fields {
|
||||
{{
|
||||
embeddedDatasourcesConnection(first: {first}, offset: {offset} ) {{
|
||||
nodes {{
|
||||
id
|
||||
name
|
||||
upstreamColumns{
|
||||
fields {{
|
||||
id
|
||||
name
|
||||
remoteType
|
||||
}
|
||||
fullyQualifiedName
|
||||
description
|
||||
}
|
||||
downstreamWorkbooks {
|
||||
id
|
||||
luid
|
||||
name
|
||||
}
|
||||
upstreamTables {
|
||||
id
|
||||
luid
|
||||
name
|
||||
fullName
|
||||
schema
|
||||
referencedByQueries {
|
||||
upstreamColumns{{
|
||||
id
|
||||
name
|
||||
remoteType
|
||||
}}
|
||||
fullyQualifiedName
|
||||
description
|
||||
}}
|
||||
workbook {{
|
||||
id
|
||||
luid
|
||||
name
|
||||
query
|
||||
}
|
||||
columns {
|
||||
}}
|
||||
upstreamTables {{
|
||||
id
|
||||
luid
|
||||
name
|
||||
}
|
||||
database {
|
||||
id
|
||||
name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fullName
|
||||
schema
|
||||
referencedByQueries {{
|
||||
id
|
||||
name
|
||||
query
|
||||
}}
|
||||
columns {{
|
||||
id
|
||||
name
|
||||
}}
|
||||
database {{
|
||||
id
|
||||
name
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
totalCount
|
||||
}}
|
||||
}}
|
||||
"""
|
||||
|
@ -116,6 +116,12 @@ This is a sample config for Tableau:
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=18 %}
|
||||
|
||||
**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
{% codeInfo srNumber=8 %}
|
||||
@ -186,6 +192,9 @@ source:
|
||||
```yaml {% srNumber=7 %}
|
||||
apiVersion: api_version
|
||||
```
|
||||
```yaml {% srNumber=18 %}
|
||||
paginationLimit: pagination_limit
|
||||
```
|
||||
```yaml {% srNumber=8 %}
|
||||
sourceConfig:
|
||||
config:
|
||||
|
@ -116,6 +116,12 @@ This is a sample config for Tableau:
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=11 %}
|
||||
|
||||
**paginationLimit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
{% codeInfo srNumber=8 %}
|
||||
@ -186,6 +192,9 @@ source:
|
||||
```yaml {% srNumber=7 %}
|
||||
apiVersion: api_version
|
||||
```
|
||||
```yaml {% srNumber=11 %}
|
||||
paginationLimit: pagination_limit
|
||||
```
|
||||
```yaml {% srNumber=8 %}
|
||||
sourceConfig:
|
||||
config:
|
||||
|
@ -221,6 +221,7 @@ For more information to get a Personal Access Token please visit this [link](htt
|
||||
- **Site Name**: This corresponds to the `contentUrl` attribute in the Tableau REST API. The `site_name` is the portion of the URL that follows the `/site/` in the URL.
|
||||
- **Site URL**: If it is empty, the default Tableau site name will be used.
|
||||
- **Environment**: The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
|
||||
- **Pagination Limit**: The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||
|
||||
{% /extraContent %}
|
||||
|
||||
|
@ -61,6 +61,12 @@
|
||||
"type": "string",
|
||||
"default": "tableau_prod"
|
||||
},
|
||||
"paginationLimit": {
|
||||
"title": "Pagination Limit",
|
||||
"description": "Pagination limit used while querying the tableau metadata API for getting data sources",
|
||||
"type": "integer",
|
||||
"default": 10
|
||||
},
|
||||
"verifySSL": {
|
||||
"$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/verifySSL",
|
||||
"default": "no-ssl"
|
||||
|
@ -85,6 +85,12 @@ $$section
|
||||
The config object can have multiple environments. The default environment is defined as `tableau_prod`, and you can change this if needed by specifying an `env` parameter.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Pagination Limit $(id="paginationLimit")
|
||||
|
||||
The pagination limit will be used while querying the Tableau Graphql endpoint to get the data source information.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Verify SSL $(id="verifySSL")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user