Fix #8778 and #8494: Tableau Parser issue and API docs (#8875)

* Tmp fixed parser and added doc for API

* fixed pycheckstyle

* Addressed review comments
This commit is contained in:
Onkar Ravgan 2022-11-21 12:50:25 +05:30 committed by GitHub
parent a857456cee
commit 5aa44e1cf0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 44 additions and 32 deletions

View File

@ -47,33 +47,13 @@ from metadata.ingestion.models.ometa_tag_category import OMetaTagAndCategory
from metadata.ingestion.source.dashboard.dashboard_service import DashboardServiceSource
from metadata.utils import fqn
from metadata.utils.filters import filter_by_chart
from metadata.utils.graphql_queries import TABLEAU_LINEAGE_GRAPHQL_QUERY
from metadata.utils.helpers import get_standard_chart_type
from metadata.utils.logger import ingestion_logger
logger = ingestion_logger()
TABLEAU_TAG_CATEGORY = "TableauTags"
TABLEAU_LINEAGE_GRAPHQL_QUERY = """
{
workbooks {
id
luid
name
upstreamTables{
name
schema
upstreamDatabases{
name
}
referencedByQueries{
name
query
}
}
}
}
"""
class TableauSource(DashboardServiceSource):
"""
@ -94,7 +74,7 @@ class TableauSource(DashboardServiceSource):
self.workbooks = {}
self.tags = []
self.owner = {}
self.workboook_datasources = {}
self.workbook_datasources = {}
def prepare(self):
# Restructuring the api response for workbooks
@ -129,13 +109,23 @@ class TableauSource(DashboardServiceSource):
owner = get_all_user_fields(self.client)
self.owner = {user["id"]: user for user in owner}
# Fetch Datasource information for lineage
graphql_query_result = self.client.metadata_graphql_query(
query=TABLEAU_LINEAGE_GRAPHQL_QUERY
)
self.workboook_datasources = json.loads(graphql_query_result.text)["data"].get(
"workbooks"
)
if self.source_config.dbServiceNames:
try:
# Fetch Datasource information for lineage
graphql_query_result = self.client.metadata_graphql_query(
query=TABLEAU_LINEAGE_GRAPHQL_QUERY
)
self.workbook_datasources = json.loads(graphql_query_result.text)[
"data"
].get("workbooks")
except Exception:
logger.debug(traceback.format_exc())
logger.warning(
"\nSomething went wrong while connecting to Tableau Metadata APIs\n"
"Please check if the Tableau Metadata APIs are enabled for you Tableau instance\n"
"For more information on enabling the Tableau Metadata APIs follow the link below\n"
"https://help.tableau.com/current/api/metadata_api/en-us/docs/meta_api_start.html#enable-the-tableau-metadata-api-for-tableau-server\n" # pylint: disable=line-too-long
)
return super().prepare()
@ -250,7 +240,7 @@ class TableauSource(DashboardServiceSource):
data_source = next(
(
data_source
for data_source in self.workboook_datasources
for data_source in self.workbook_datasources or []
if data_source.get("luid") == dashboard_id
),
None,

View File

@ -410,8 +410,9 @@ def split_table_name(table_name: str) -> Dict[str, Optional[str]]:
:param table_name: raw table name
:return: dict with data
"""
details: List[str] = split(table_name)
# Revisit: Check the antlr grammer for issue when string has double quotes
# Issue Link: https://github.com/open-metadata/OpenMetadata/issues/8874
details: List[str] = split(table_name.replace('"', ""))
# Pad None to the left until size of list is 3
full_details: List[Optional[str]] = ([None] * (3 - len(details))) + details

View File

@ -139,3 +139,24 @@ GRAPHQL_QUERY_FOR_JOBS = """query PipelineRuns($selector: GraphSelector!){
}
}
}"""
TABLEAU_LINEAGE_GRAPHQL_QUERY = """
{
workbooks {
id
luid
name
upstreamTables{
name
schema
upstreamDatabases{
name
}
referencedByQueries{
name
query
}
}
}
}
"""