feat(mode): add mode analytics ingestion source (#3710)

This commit is contained in:
Gabe Lyons 2021-12-09 16:10:08 -08:00 committed by GitHub
parent bd4ecbc7b9
commit 8394fc62b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1995 additions and 35 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

@ -32,41 +32,42 @@ We use a plugin architecture so that you can install only the dependencies you a
Sources:
| Plugin Name | Install Command | Provides |
| ----------------------------------------------- | ---------------------------------------------------------- | ----------------------------------- |
| [file](./source_docs/file.md) | _included by default_ | File source and sink |
| [athena](./source_docs/athena.md) | `pip install 'acryl-datahub[athena]'` | AWS Athena source |
| [bigquery](./source_docs/bigquery.md) | `pip install 'acryl-datahub[bigquery]'` | BigQuery source |
| [bigquery-usage](./source_docs/bigquery.md) | `pip install 'acryl-datahub[bigquery-usage]'` | BigQuery usage statistics source |
| [datahub-business-glossary](./source_docs/business_glossary.md) | _no additional dependencies_ | Business Glossary File source |
| [dbt](./source_docs/dbt.md) | _no additional dependencies_ | dbt source |
| [druid](./source_docs/druid.md) | `pip install 'acryl-datahub[druid]'` | Druid Source |
| [feast](./source_docs/feast.md) | `pip install 'acryl-datahub[feast]'` | Feast source |
| [glue](./source_docs/glue.md) | `pip install 'acryl-datahub[glue]'` | AWS Glue source |
| [hive](./source_docs/hive.md) | `pip install 'acryl-datahub[hive]'` | Hive source |
| [kafka](./source_docs/kafka.md) | `pip install 'acryl-datahub[kafka]'` | Kafka source |
| [kafka-connect](./source_docs/kafka-connect.md) | `pip install 'acryl-datahub[kafka-connect]'` | Kafka connect source |
| [ldap](./source_docs/ldap.md) | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
| [looker](./source_docs/looker.md) | `pip install 'acryl-datahub[looker]'` | Looker source |
| [lookml](./source_docs/lookml.md) | `pip install 'acryl-datahub[lookml]'` | LookML source, requires Python 3.7+ |
| [mongodb](./source_docs/mongodb.md) | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
| [mssql](./source_docs/mssql.md) | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
| [mysql](./source_docs/mysql.md) | `pip install 'acryl-datahub[mysql]'` | MySQL source |
| [mariadb](./source_docs/mariadb.md) | `pip install 'acryl-datahub[mariadb]'` | MariaDB source |
| [openapi](./source_docs/openapi.md) | `pip install 'acryl-datahub[openapi]'` | OpenApi Source |
| [oracle](./source_docs/oracle.md) | `pip install 'acryl-datahub[oracle]'` | Oracle source |
| [postgres](./source_docs/postgres.md) | `pip install 'acryl-datahub[postgres]'` | Postgres source |
| [redash](./source_docs/redash.md) | `pip install 'acryl-datahub[redash]'` | Redash source |
| [redshift](./source_docs/redshift.md) | `pip install 'acryl-datahub[redshift]'` | Redshift source |
| [sagemaker](./source_docs/sagemaker.md) | `pip install 'acryl-datahub[sagemaker]'` | AWS SageMaker source |
| [snowflake](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
| [snowflake-usage](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake-usage]'` | Snowflake usage statistics source |
| [sql-profiles](./source_docs/sql_profiles.md) | `pip install 'acryl-datahub[sql-profiles]'` | Data profiles for SQL-based systems |
| [sqlalchemy](./source_docs/sqlalchemy.md) | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source |
| [superset](./source_docs/superset.md) | `pip install 'acryl-datahub[superset]'` | Superset source |
| [trino](./source_docs/trino.md) | `pip install 'acryl-datahub[trino]` | Trino source |
| [starburst-trino-usage](./source_docs/trino.md) | `pip install 'acryl-datahub[starburst-trino-usage]'` | Starburst Trino usage statistics source |
| [nifi](./source_docs/nifi.md) | `pip install 'acryl-datahub[nifi]' | Nifi source |
| Plugin Name | Install Command | Provides |
|-----------------------------------------------------------------|------------------------------------------------------------| ----------------------------------- |
| [file](./source_docs/file.md) | _included by default_ | File source and sink |
| [athena](./source_docs/athena.md) | `pip install 'acryl-datahub[athena]'` | AWS Athena source |
| [bigquery](./source_docs/bigquery.md) | `pip install 'acryl-datahub[bigquery]'` | BigQuery source |
| [bigquery-usage](./source_docs/bigquery.md) | `pip install 'acryl-datahub[bigquery-usage]'` | BigQuery usage statistics source |
| [datahub-business-glossary](./source_docs/business_glossary.md) | _no additional dependencies_ | Business Glossary File source |
| [dbt](./source_docs/dbt.md) | _no additional dependencies_ | dbt source |
| [druid](./source_docs/druid.md) | `pip install 'acryl-datahub[druid]'` | Druid Source |
| [feast](./source_docs/feast.md) | `pip install 'acryl-datahub[feast]'` | Feast source |
| [glue](./source_docs/glue.md) | `pip install 'acryl-datahub[glue]'` | AWS Glue source |
| [hive](./source_docs/hive.md) | `pip install 'acryl-datahub[hive]'` | Hive source |
| [kafka](./source_docs/kafka.md) | `pip install 'acryl-datahub[kafka]'` | Kafka source |
| [kafka-connect](./source_docs/kafka-connect.md) | `pip install 'acryl-datahub[kafka-connect]'` | Kafka connect source |
| [ldap](./source_docs/ldap.md) | `pip install 'acryl-datahub[ldap]'` ([extra requirements]) | LDAP source |
| [looker](./source_docs/looker.md) | `pip install 'acryl-datahub[looker]'` | Looker source |
| [lookml](./source_docs/lookml.md) | `pip install 'acryl-datahub[lookml]'` | LookML source, requires Python 3.7+ |
| [mode](./source_docs/mode.md) | `pip install 'acryl-datahub[mode]'` | Mode Analytics source |
| [mongodb](./source_docs/mongodb.md) | `pip install 'acryl-datahub[mongodb]'` | MongoDB source |
| [mssql](./source_docs/mssql.md) | `pip install 'acryl-datahub[mssql]'` | SQL Server source |
| [mysql](./source_docs/mysql.md) | `pip install 'acryl-datahub[mysql]'` | MySQL source |
| [mariadb](./source_docs/mariadb.md) | `pip install 'acryl-datahub[mariadb]'` | MariaDB source |
| [openapi](./source_docs/openapi.md) | `pip install 'acryl-datahub[openapi]'` | OpenApi Source |
| [oracle](./source_docs/oracle.md) | `pip install 'acryl-datahub[oracle]'` | Oracle source |
| [postgres](./source_docs/postgres.md) | `pip install 'acryl-datahub[postgres]'` | Postgres source |
| [redash](./source_docs/redash.md) | `pip install 'acryl-datahub[redash]'` | Redash source |
| [redshift](./source_docs/redshift.md) | `pip install 'acryl-datahub[redshift]'` | Redshift source |
| [sagemaker](./source_docs/sagemaker.md) | `pip install 'acryl-datahub[sagemaker]'` | AWS SageMaker source |
| [snowflake](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake]'` | Snowflake source |
| [snowflake-usage](./source_docs/snowflake.md) | `pip install 'acryl-datahub[snowflake-usage]'` | Snowflake usage statistics source |
| [sql-profiles](./source_docs/sql_profiles.md) | `pip install 'acryl-datahub[sql-profiles]'` | Data profiles for SQL-based systems |
| [sqlalchemy](./source_docs/sqlalchemy.md) | `pip install 'acryl-datahub[sqlalchemy]'` | Generic SQLAlchemy source |
| [superset](./source_docs/superset.md) | `pip install 'acryl-datahub[superset]'` | Superset source |
| [trino](./source_docs/trino.md) | `pip install 'acryl-datahub[trino]` | Trino source |
| [starburst-trino-usage](./source_docs/trino.md) | `pip install 'acryl-datahub[starburst-trino-usage]'` | Starburst Trino usage statistics source |
| [nifi](./source_docs/nifi.md) | `pip install 'acryl-datahub[nifi]' | Nifi source |
Sinks

View File

@ -197,6 +197,26 @@
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DataPlatformSnapshot": {
"urn": "urn:li:dataPlatform:mode",
"aspects": [
{
"com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo": {
"datasetNameDelimiter": ".",
"name": "mode",
"displayName": "Mode",
"type": "KEY_VALUE_STORE",
"logoUrl": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/modelogo.png"
}
}
]
}
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {

View File

@ -0,0 +1,16 @@
# see https://datahubproject.io/docs/metadata-ingestion/source_docs/metabase for complete documentation
source:
type: "mode"
config:
token: 9fa6a90fcd33
password: a03bcbc011d6f77c585f5682
connect_uri: https://app.mode.com/
workspace: "petabloc"
default_schema: "public"
owner_username_instead_of_email: False
# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"

View File

@ -112,6 +112,7 @@ plugins: Dict[str, Set[str]] = {
"ldap": {"python-ldap>=2.4"},
"looker": looker_common,
"lookml": looker_common | {"lkml>=1.1.0", "sql-metadata==2.2.2"},
"mode": {"requests", "sqllineage"},
"mongodb": {"pymongo>=3.11"},
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
"mssql-odbc": sql_common | {"pyodbc"},
@ -282,6 +283,7 @@ entry_points = {
"looker = datahub.ingestion.source.looker:LookerDashboardSource",
"lookml = datahub.ingestion.source.lookml:LookMLSource",
"datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource",
"mode = datahub.ingestion.source.mode:ModeSource",
"mongodb = datahub.ingestion.source.mongodb:MongoDBSource",
"mssql = datahub.ingestion.source.sql.mssql:SQLServerSource",
"mysql = datahub.ingestion.source.sql.mysql:MySQLSource",

View File

@ -0,0 +1,113 @@
# Mode
For context on getting started with ingestion, check out our [metadata ingestion guide](../README.md).
## Setup
To install this plugin, run `pip install 'acryl-datahub[mode]'`.
See documentation for Mode's API at https://mode.com/developer/api-reference/introduction/
## Capabilities
This plugin extracts Charts, Reports, and associated metadata from a given Mode workspace. This plugin is in beta and has only been tested
on PostgreSQL database.
### Report
[/api/{account}/reports/{report}](https://mode.com/developer/api-reference/analytics/reports/) endpoint is used to
retrieve the following report information.
- Title and description
- Last edited by
- Owner
- Link to the Report in Mode for exploration
- Associated charts within the report
### Chart
[/api/{workspace}/reports/{report}/queries/{query}/charts'](https://mode.com/developer/api-reference/analytics/charts/#getChart) endpoint is used to
retrieve the following information.
- Title and description
- Last edited by
- Owner
- Link to the chart in Metabase
- Datasource and lineage information from Report queries.
The following properties for a chart are ingested in DataHub.
#### Chart Information
| Name | Description |
|-----------|----------------------------------------|
| `Filters` | Filters applied to the chart |
| `Metrics` | Fields or columns used for aggregation |
| `X` | Fields used in X-axis |
| `X2` | Fields used in second X-axis |
| `Y` | Fields used in Y-axis |
| `Y2` | Fields used in second Y-axis |
#### Table Information
| Name | Description |
|-----------|------------------------------|
| `Columns` | Column names in a table |
| `Filters` | Filters applied to the table |
#### Pivot Table Information
| Name | Description |
|-----------|----------------------------------------|
| `Columns` | Column names in a table |
| `Filters` | Filters applied to the table |
| `Metrics` | Fields or columns used for aggregation |
| `Rows` | Row names in a table |
## Quickstart recipe
Check out the following recipe to get started with ingestion! See [below](#config-details) for full configuration options.
For general pointers on writing and running a recipe, see our [main recipe guide](../README.md#recipes).
```yml
source:
type: mode
config:
# Coordinates
connect_uri: http://app.mode.com
# Credentials
token: token
password: pass
# Options
workspace: "datahub"
default_schema: "public"
sink:
# sink configs
```
## Config details
| Field | Required | Default | Description |
|------------------| -------- |--------------------------|-------------------------------------------------------------------|
| `connect_uri` | ✅ | `"https://app.mode.com"` | Mode host URL. |
| `token` | ✅ | | Mode user token. |
| `password` | ✅ | | Mode password for authentication. |
| `default_schema` | | `public` | Default schema to use when schema is not provided in an SQL query |
| `env` | | `"PROD"` | Environment to use in namespace when constructing URNs. |
See Mode's [Authentication documentation](https://mode.com/developer/api-reference/authentication/) on how to generate `token` and `password`.
## Compatibility
Coming soon!
## Questions
If you've got any questions on configuring this source, feel free to ping us on
[our Slack](https://slack.datahubproject.io/)!

View File

@ -0,0 +1,583 @@
from functools import lru_cache
from typing import Dict, Iterable, Optional, Tuple, Union
import dateutil.parser as dp
import requests
from pydantic import validator
from requests.models import HTTPBasicAuth, HTTPError
from sqllineage.runner import LineageRunner
import datahub.emitter.mce_builder as builder
from datahub.configuration.common import ConfigModel
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.source import Source, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata.com.linkedin.pegasus2avro.common import (
AuditStamp,
ChangeAuditStamps,
)
from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import (
ChartSnapshot,
DashboardSnapshot,
)
from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
from datahub.metadata.schema_classes import (
BrowsePathsClass,
ChartInfoClass,
ChartQueryClass,
ChartQueryTypeClass,
ChartTypeClass,
DashboardInfoClass,
OwnerClass,
OwnershipClass,
OwnershipTypeClass,
)
from datahub.utilities import config_clean
class ModeConfig(ConfigModel):
# See https://mode.com/developer/api-reference/authentication/
# for authentication
connect_uri: str = "https://app.mode.com"
token: Optional[str] = None
password: Optional[str] = None
workspace: Optional[str] = None
default_schema: str = "public"
owner_username_instead_of_email: Optional[bool] = True
env: str = builder.DEFAULT_ENV
@validator("connect_uri")
def remove_trailing_slash(cls, v):
return config_clean.remove_trailing_slashes(v)
class ModeSource(Source):
config: ModeConfig
report: SourceReport
platform = "mode"
def __hash__(self):
return id(self)
def __init__(self, ctx: PipelineContext, config: ModeConfig):
super().__init__(ctx)
self.config = config
self.report = SourceReport()
self.session = requests.session()
self.session.auth = HTTPBasicAuth(self.config.token, self.config.password)
self.session.headers.update(
{
"Content-Type": "application/json",
"Accept": "application/hal+json",
}
)
# Test the connection
try:
test_response = self.session.get(f"{self.config.connect_uri}/api/account")
test_response.raise_for_status()
except HTTPError as http_error:
self.report.report_failure(
key="mode-session",
reason=f"Unable to retrieve user "
f"{self.config.token} information, "
f"{str(http_error)}",
)
self.workspace_uri = (
f"{self.config.connect_uri}/api/" f"{self.config.workspace}"
)
self.space_tokens = self._get_space_name_and_tokens()
def construct_dashboard(
self, space_name: str, report_info: dict
) -> DashboardSnapshot:
report_token = report_info.get("token", "")
dashboard_urn = builder.make_dashboard_urn(
self.platform, report_info.get("id", "")
)
dashboard_snapshot = DashboardSnapshot(
urn=dashboard_urn,
aspects=[],
)
modified_actor = builder.make_user_urn(
self._get_creator(
report_info.get("_links", {}).get("creator", {}).get("href", "")
)
)
modified_ts = int(
dp.parse(f"{report_info.get('last_saved_at', 'now')}").timestamp() * 1000
)
created_ts = int(
dp.parse(f"{report_info.get('created_at', 'now')}").timestamp() * 1000
)
title = report_info.get("name", "") or ""
description = report_info.get("description", "") or ""
last_modified = ChangeAuditStamps(
created=AuditStamp(time=created_ts, actor=modified_actor),
lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
)
dashboard_info_class = DashboardInfoClass(
description=description,
title=title,
charts=self._get_chart_urns(report_token),
lastModified=last_modified,
dashboardUrl=f"{self.config.connect_uri}/"
f"{self.config.workspace}/"
f"reports/{report_token}",
customProperties={},
)
dashboard_snapshot.aspects.append(dashboard_info_class)
# browse path
browse_path = BrowsePathsClass(
paths=[
f"/mode/{self.config.workspace}/"
f"{space_name}/"
f"{report_info.get('name')}"
]
)
dashboard_snapshot.aspects.append(browse_path)
# Ownership
ownership = self._get_ownership(
self._get_creator(
report_info.get("_links", {}).get("creator", {}).get("href", "")
)
)
if ownership is not None:
dashboard_snapshot.aspects.append(ownership)
return dashboard_snapshot
@lru_cache(maxsize=None)
def _get_ownership(self, user: str) -> Optional[OwnershipClass]:
owner_urn = builder.make_user_urn(user)
if owner_urn is not None:
ownership: OwnershipClass = OwnershipClass(
owners=[
OwnerClass(
owner=owner_urn,
type=OwnershipTypeClass.DATAOWNER,
)
]
)
return ownership
return None
@lru_cache(maxsize=None)
def _get_creator(self, href: str) -> str:
user = self.session.get(f"{self.config.connect_uri}{href}")
user_json = user.json()
if self.config.owner_username_instead_of_email:
return user_json.get("username", "unknown")
else:
return user_json.get("email", "unknown")
def _get_chart_urns(self, report_token: str) -> list:
chart_urns = []
queries = self._get_queries(report_token)
for query in queries:
charts = self._get_charts(report_token, query.get("token", ""))
# build chart urns
for chart in charts:
chart_urn = builder.make_chart_urn(
self.platform, chart.get("token", "")
)
chart_urns.append(chart_urn)
return chart_urns
def _get_space_name_and_tokens(self) -> dict:
space_info = {}
try:
workspace_response = self.session.get(f"{self.workspace_uri}/spaces")
workspace_response.raise_for_status()
payload = workspace_response.json()
spaces = payload.get("_embedded", {}).get("spaces", {})
for s in spaces:
space_info[s.get("token", "")] = s.get("name", "")
except HTTPError as http_error:
self.report.report_failure(
key="mode-spaces",
reason=f"Unable to retrieve spaces/collections for {self.workspace_uri}, "
f"Reason: {str(http_error)}",
)
return space_info
def _get_chart_type(self, token: str, display_type: str) -> Optional[str]:
type_mapping = {
"table": ChartTypeClass.TABLE,
"bar": ChartTypeClass.BAR,
"line": ChartTypeClass.LINE,
"stackedBar100": ChartTypeClass.BAR,
"stackedBar": ChartTypeClass.BAR,
"hStackedBar": ChartTypeClass.BAR,
"hStackedBar100": ChartTypeClass.BAR,
"hBar": ChartTypeClass.BAR,
"area": ChartTypeClass.AREA,
"totalArea": ChartTypeClass.AREA,
"pie": ChartTypeClass.PIE,
"donut": ChartTypeClass.PIE,
"scatter": ChartTypeClass.SCATTER,
"bigValue": ChartTypeClass.TEXT,
"pivotTable": ChartTypeClass.TABLE,
"linePlusBar": None,
}
if not display_type:
self.report.report_warning(
key=f"mode-chart-{token}",
reason=f"Chart type {display_type} is missing. " f"Setting to None",
)
return None
try:
chart_type = type_mapping[display_type]
except KeyError:
self.report.report_warning(
key=f"mode-chart-{token}",
reason=f"Chart type {display_type} not supported. " f"Setting to None",
)
chart_type = None
return chart_type
def construct_chart_custom_properties(
self, chart_detail: dict, chart_type: str
) -> Dict:
custom_properties = {}
metadata = chart_detail.get("encoding", {})
if chart_type == "table":
columns = list(chart_detail.get("fieldFormats", {}).keys())
str_columns = ",".join([c[1:-1] for c in columns])
filters = metadata.get("filter", [])
filters = filters[0].get("formula", "") if len(filters) else ""
custom_properties = {
"Columns": str_columns,
"Filters": filters[1:-1] if len(filters) else "",
}
elif chart_type == "pivotTable":
pivot_table = chart_detail.get("pivotTable", {})
columns = pivot_table.get("columns", [])
rows = pivot_table.get("rows", [])
values = pivot_table.get("values", [])
filters = pivot_table.get("filters", [])
custom_properties = {
"Columns": ", ".join(columns) if len(columns) else "",
"Rows": ", ".join(rows) if len(rows) else "",
"Metrics": ", ".join(values) if len(values) else "",
"Filters": ", ".join(filters) if len(filters) else "",
}
# list filters in their own row
for filter in filters:
custom_properties[f"Filter: {filter}"] = ", ".join(
pivot_table.get("filterValues", {}).get(filter, "")
)
# Chart
else:
x = metadata.get("x", [])
x2 = metadata.get("x2", [])
y = metadata.get("y", [])
y2 = metadata.get("y2", [])
value = metadata.get("value", [])
filters = metadata.get("filter", [])
custom_properties = {
"X": x[0].get("formula", "") if len(x) else "",
"Y": y[0].get("formula", "") if len(y) else "",
"X2": x2[0].get("formula", "") if len(x2) else "",
"Y2": y2[0].get("formula", "") if len(y2) else "",
"Metrics": value[0].get("formula", "") if len(value) else "",
"Filters": filters[0].get("formula", "") if len(filters) else "",
}
return custom_properties
def _get_datahub_friendly_platform(self, adapter, platform):
# Map adaptor names to what datahub expects in
# https://github.com/linkedin/datahub/blob/master/metadata-service/war/src/main/resources/boot/data_platforms.json
platform_mapping = {
"jdbc:athena": "athena",
"jdbc:bigquery": "bigquery",
"jdbc:druid": "druid",
"jdbc:hive": "hive",
"jdbc:mysql": "mysql",
"jdbc:oracle": "oracle",
"jdbc:postgresql": "postgres",
"jdbc:presto": "presto",
"jdbc:redshift": "redshift",
"jdbc:snowflake": "snowflake",
"jdbc:spark": "spark",
"jdbc:sqlserver": "mssql",
"jdbc:teradata": "teradata",
}
if adapter in platform_mapping:
return platform_mapping[adapter]
else:
self.report.report_warning(
key=f"mode-platform-{adapter}",
reason=f"Platform was not found in DataHub. "
f"Using {platform} name as is",
)
return platform
@lru_cache(maxsize=None)
def _get_platform_and_dbname(
self, data_source_id: int
) -> Union[Tuple[str, str], Tuple[None, None]]:
ds_response = self.session.get(f"{self.workspace_uri}/data_sources")
ds_json = ds_response.json()
data_sources = ds_json.get("_embedded", {}).get("data_sources", {})
if not data_sources:
self.report.report_failure(
key=f"mode-datasource-{data_source_id}",
reason=f"No data sources found for datasource id: " f"{data_source_id}",
)
return None, None
for data_source in data_sources:
if data_source.get("id", -1) == data_source_id:
platform = self._get_datahub_friendly_platform(
data_source.get("adapter", ""), data_source.get("name", "")
)
database = data_source.get("database", "")
return platform, database
else:
self.report.report_failure(
key=f"mode-datasource-{data_source_id}",
reason=f"Cannot create datasource urn for datasource id: "
f"{data_source_id}",
)
return None, None
@lru_cache(maxsize=None)
def _get_source_from_query(self, raw_query: str) -> set:
parser = LineageRunner(raw_query)
source_paths = set()
for table in parser.source_tables:
source_schema, source_table = str(table).split(".")
if source_schema == "<default>":
source_schema = str(self.config.default_schema)
source_paths.add(f"{source_schema}.{source_table}")
return source_paths
def _get_datasource_urn(self, platform, database, source_tables):
dataset_urn = None
if platform or database is not None:
dataset_urn = [
builder.make_dataset_urn(
platform, f"{database}.{s_table}", self.config.env
)
for s_table in source_tables
]
return dataset_urn
def construct_chart_from_api_data(
self, chart_data: dict, query: dict, path: str
) -> ChartSnapshot:
chart_urn = builder.make_chart_urn(self.platform, chart_data.get("token", ""))
chart_snapshot = ChartSnapshot(
urn=chart_urn,
aspects=[],
)
modified_actor = builder.make_user_urn(
self._get_creator(
chart_data.get("_links", {}).get("creator", {}).get("href", "")
)
)
created_ts = int(
dp.parse(chart_data.get("created_at", "now")).timestamp() * 1000
)
modified_ts = int(
dp.parse(chart_data.get("updated_at", "now")).timestamp() * 1000
)
last_modified = ChangeAuditStamps(
created=AuditStamp(time=created_ts, actor=modified_actor),
lastModified=AuditStamp(time=modified_ts, actor=modified_actor),
)
chart_detail = (
chart_data.get("view", {})
if len(chart_data.get("view", {})) != 0
else chart_data.get("view_vegas", {})
)
mode_chart_type = chart_detail.get("chartType", "") or chart_detail.get(
"selectedChart", ""
)
chart_type = self._get_chart_type(chart_data.get("token", ""), mode_chart_type)
description = (
chart_detail.get("description")
or chart_detail.get("chartDescription")
or ""
)
title = chart_detail.get("title") or chart_detail.get("chartTitle") or ""
# create datasource urn
platform, db_name = self._get_platform_and_dbname(query.get("data_source_id"))
source_tables = self._get_source_from_query(query.get("raw_query"))
datasource_urn = self._get_datasource_urn(platform, db_name, source_tables)
custom_properties = self.construct_chart_custom_properties(
chart_detail, mode_chart_type
)
# Chart Info
chart_info = ChartInfoClass(
type=chart_type,
description=description,
title=title,
lastModified=last_modified,
chartUrl=f"{self.config.connect_uri}"
f"{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}",
inputs=datasource_urn,
customProperties=custom_properties,
)
chart_snapshot.aspects.append(chart_info)
# Browse Path
browse_path = BrowsePathsClass(paths=[path])
chart_snapshot.aspects.append(browse_path)
# Query
chart_query = ChartQueryClass(
rawQuery=query.get("raw_query", ""),
type=ChartQueryTypeClass.SQL,
)
chart_snapshot.aspects.append(chart_query)
# Ownership
ownership = self._get_ownership(
self._get_creator(
chart_data.get("_links", {}).get("creator", {}).get("href", "")
)
)
if ownership is not None:
chart_snapshot.aspects.append(ownership)
return chart_snapshot
@lru_cache(maxsize=None)
def _get_reports(self, space_token: str) -> list:
reports = []
try:
reports_response = self.session.get(
f"{self.workspace_uri}/spaces/{space_token}/reports"
)
reports_response.raise_for_status()
reports_json = reports_response.json()
reports = reports_json.get("_embedded", {}).get("reports", {})
except HTTPError as http_error:
self.report.report_failure(
key=f"mode-report-{space_token}",
reason=f"Unable to retrieve reports for space token: {space_token}, "
f"Reason: {str(http_error)}",
)
return reports
@lru_cache(maxsize=None)
def _get_queries(self, report_token: str) -> list:
queries = []
try:
queries_response = self.session.get(
f"{self.workspace_uri}/reports/{report_token}/queries"
)
queries_response.raise_for_status()
queries_json = queries_response.json()
queries = queries_json.get("_embedded", {}).get("queries", {})
except HTTPError as http_error:
self.report.report_failure(
key=f"mode-query-{report_token}",
reason=f"Unable to retrieve queries for report token: {report_token}, "
f"Reason: {str(http_error)}",
)
return queries
@lru_cache(maxsize=None)
def _get_charts(self, report_token: str, query_token: str) -> list:
charts = []
try:
chart_response = self.session.get(
f"{self.workspace_uri}/reports/{report_token}"
f"/queries/{query_token}/charts"
)
chart_response.raise_for_status()
charts_json = chart_response.json()
charts = charts_json.get("_embedded", {}).get("charts", {})
except HTTPError as http_error:
self.report.report_failure(
key=f"mode-chart-{report_token}-{query_token}",
reason=f"Unable to retrieve charts: "
f"Report token: {report_token} "
f"Query token: {query_token}, "
f"Reason: {str(http_error)}",
)
return charts
def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
for space_token, space_name in self.space_tokens.items():
reports = self._get_reports(space_token)
for report in reports:
dashboard_snapshot_from_report = self.construct_dashboard(
space_name, report
)
mce = MetadataChangeEvent(
proposedSnapshot=dashboard_snapshot_from_report
)
wu = MetadataWorkUnit(id=dashboard_snapshot_from_report.urn, mce=mce)
self.report.report_workunit(wu)
yield wu
def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]:
# Space/collection -> report -> query -> Chart
for space_token, space_name in self.space_tokens.items():
reports = self._get_reports(space_token)
for report in reports:
report_token = report.get("token", "")
queries = self._get_queries(report_token)
for query in queries:
charts = self._get_charts(report_token, query.get("token", ""))
# build charts
for chart in charts:
view = chart.get("view") or chart.get("view_vegas")
chart_name = view.get("title") or view.get("chartTitle") or ""
path = (
f"/mode/{self.config.workspace}/{space_name}"
f"/{report.get('name')}/{query.get('name')}/"
f"{chart_name}"
)
chart_snapshot = self.construct_chart_from_api_data(
chart, query, path
)
mce = MetadataChangeEvent(proposedSnapshot=chart_snapshot)
wu = MetadataWorkUnit(id=chart_snapshot.urn, mce=mce)
self.report.report_workunit(wu)
yield wu
@classmethod
def create(cls, config_dict: dict, ctx: PipelineContext) -> Source:
config = ModeConfig.parse_obj(config_dict)
return cls(ctx, config)
def get_workunits(self) -> Iterable[MetadataWorkUnit]:
yield from self.emit_dashboard_mces()
yield from self.emit_chart_mces()
def get_report(self) -> SourceReport:
return self.report

View File

@ -0,0 +1,150 @@
[
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
"urn": "urn:li:dashboard:(mode,2910167)",
"aspects": [
{
"com.linkedin.pegasus2avro.dashboard.DashboardInfo": {
"customProperties": {},
"externalUrl": null,
"title": "Actor Report",
"description": "Actor Report Description",
"charts": [
"urn:li:chart:(mode,bce2fc7d4910)"
],
"lastModified": {
"created": {
"time": 1638301936996,
"actor": "urn:li:corpuser:modeuser",
"impersonator": null
},
"lastModified": {
"time": 1638935478670,
"actor": "urn:li:corpuser:modeuser",
"impersonator": null
},
"deleted": null
},
"dashboardUrl": "https://app.mode.com/acryl/reports/72f2ef8fb3a8",
"access": null,
"lastRefreshed": null
}
},
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/mode/acryl/Petabloc/Actor Report"
]
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:modeuser",
"type": "DATAOWNER",
"source": null
}
],
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown",
"impersonator": null
}
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1638860400000,
"runId": "mode-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
"urn": "urn:li:chart:(mode,bce2fc7d4910)",
"aspects": [
{
"com.linkedin.pegasus2avro.chart.ChartInfo": {
"customProperties": {
"Columns": "first_name,last_name,payment_date",
"Filters": "last_name"
},
"externalUrl": null,
"title": "Customer Payment Information",
"description": "",
"lastModified": {
"created": {
"time": 1638827576804,
"actor": "urn:li:corpuser:modeuser",
"impersonator": null
},
"lastModified": {
"time": 1638904049708,
"actor": "urn:li:corpuser:modeuser",
"impersonator": null
},
"deleted": null
},
"chartUrl": "https://app.mode.com/acryl/reports/72f2ef8fb3a8/viz/bce2fc7d4910",
"inputs": [
{
"string": "urn:li:dataset:(urn:li:dataPlatform:postgres,dvdrental.public.payment,PROD)"
}
],
"type": "TABLE",
"access": null,
"lastRefreshed": null
}
},
{
"com.linkedin.pegasus2avro.common.BrowsePaths": {
"paths": [
"/mode/acryl/Petabloc/Actor Report/Customer Payment/Customer Payment Information"
]
}
},
{
"com.linkedin.pegasus2avro.chart.ChartQuery": {
"rawQuery": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date\\nFROM\\n\\tcustomer\\nINNER JOIN public.payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date;\\n",
"type": "SQL"
}
},
{
"com.linkedin.pegasus2avro.common.Ownership": {
"owners": [
{
"owner": "urn:li:corpuser:modeuser",
"type": "DATAOWNER",
"source": null
}
],
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown",
"impersonator": null
}
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1638860400000,
"runId": "mode-test",
"registryName": null,
"registryVersion": null,
"properties": null
}
}
]

View File

@ -0,0 +1,315 @@
{
"_links": {
"self": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts"
}
},
"_embedded": {
"charts": [{
"view": {},
"view_version": 2,
"view_vegas": {
"format": {
"xAxis": {
"rotate": null,
"dateFormat": "auto",
"numberRange": {
"min": null,
"max": null
},
"numberFormat": {
"format": null,
"precision": null
},
"title": null,
"titleEnabled": true
},
"yAxis": {
"numberRange": {
"min": null,
"max": null
},
"numberFormat": {
"format": null,
"precision": null
},
"title": null,
"titleEnabled": true,
"scaleType": "linear"
},
"y2Axis": {
"numberRange": {
"min": null,
"max": null
},
"numberFormat": {
"format": null,
"precision": null
},
"title": null,
"titleEnabled": true,
"scaleType": "linear"
},
"flatTable": {
"general": {
"sizingStyle": "sizeToFit",
"colorScheme": "default",
"density": "standard",
"colGridlines": true,
"rowGridlines": true,
"rowNumbers": false
},
"columns": [{
"id": "[first_name]",
"colType": "STRING"
}, {
"id": "[last_name]",
"colType": "STRING"
}, {
"id": "[payment_date]",
"colType": "TIMESTAMP"
}]
},
"legend": {
"enabled": true,
"position": "auto",
"title": null,
"titleEnabled": true
},
"bigValue": {
"description": null,
"primaryTrendFormat": {
"label": null
}
},
"nullToZero": false,
"showValues": true,
"pieLabelsNotPercentages": false,
"drilldownEnabled": true,
"colors": {
"theme": "e7882faa26e7",
"custom": ["#37B067", " #6296BC", " #EDB40D", " #7FD7C1", " #9F8CAE", " #EB6672", " #376C72", " #EE9DCC", " #E3791A", " #9F765E"],
"map": {}
}
},
"fieldFormats": {
"[first_name]": {
"paneFormat": {
"default": {
"colType": "STRING",
"type": "default",
"horizontalAlignment": "left"
}
},
"axisFormat": {},
"headerFormat": {},
"legendFormat": {}
},
"[last_name]": {
"paneFormat": {
"default": {
"colType": "STRING",
"type": "default",
"horizontalAlignment": "left"
}
},
"axisFormat": {},
"headerFormat": {},
"legendFormat": {}
},
"[payment_date]": {
"paneFormat": {
"default": {
"colType": "TIMESTAMP",
"type": "date",
"horizontalAlignment": "right",
"dateFormat": "hour"
}
},
"axisFormat": {},
"headerFormat": {},
"legendFormat": {}
}
},
"fieldColors": {},
"title": "Customer Payment Information",
"description": "",
"chartType": "table",
"encoding": {
"row": [],
"column": [],
"x": [],
"y": [],
"x2": [],
"y2": [],
"value": [],
"marks": {
"channel": "*",
"series": [],
"properties": {
"type": {
"value": "Text",
"automatic": true
},
"color": {
"values": []
},
"size": {
"values": []
},
"text": {
"values": [{
"id": "a8b5",
"formula": "[first_name]",
"variable": {
"type": "DISCRETE"
}
}, {
"id": "e1c2",
"formula": "[last_name]",
"variable": {
"type": "DISCRETE"
}
}, {
"id": "e991",
"formula": "[payment_date]",
"variable": {
"type": "DISCRETE"
}
}]
},
"label": {
"values": []
},
"detail": {
"values": []
},
"angle": {
"values": []
},
"tooltip": {
"values": []
}
}
},
"sorts": [{
"source": "[first_name]",
"byField": {
"id": "a8b5",
"formula": "[first_name]",
"variable": {
"type": "DISCRETE"
}
},
"type": "DATA",
"descending": false
}],
"filter": [{
"id": "2234",
"formula": "[last_name]",
"variable": {
"type": "DISCRETE"
},
"selection": {
"selected": ["Alexander", "Allard", "Arce", "Archuleta", "Artis"],
"mode": "INCLUDE"
},
"filterType": "DISCRETE"
}],
"totals": {
"grand": {
"row": false,
"column": false
}
}
}
},
"token": "bce2fc7d4910",
"created_at": "2021-12-06T21:52:56.804Z",
"updated_at": "2021-12-07T19:07:29.708Z",
"color_palette_token": "e7882faa26e7",
"switch_view_token": "bce5e808453c",
"_links": {
"self": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts/bce2fc7d4910"
},
"color_palette": {
"href": "/api/acryl/color_palettes/e7882faa26e7"
},
"report_viz_web": {
"href": "/acryl/reports/72f2ef8fb3a8/viz/bce2fc7d4910"
},
"preview": {
"templated": true,
"href": "/api/acryl/reports/72f2ef8fb3a8/runs/{token}/charts/bce2fc7d4910/preview"
},
"report_run_viz_web": {
"templated": true,
"href": "/acryl/reports/72f2ef8fb3a8/runs/{token}/viz/bce2fc7d4910"
},
"creator": {
"href": "/api/modeuser"
}
},
"_forms": {
"create": {
"method": "post",
"action": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts",
"input": {
"chart": {
"view": {
"type": "text",
"value": "{}"
},
"view_vegas": {
"type": "text",
"value": "{}"
},
"view_version": {
"type": "select",
"options": ["1", "2"],
"value": 1
},
"token": {
"type": "text",
"value": ""
},
"color_palette_token": {
"type": "text"
}
}
}
},
"edit": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts/bce2fc7d4910",
"input": {
"chart": {
"view": {
"type": "text",
"value": "{}"
},
"view_vegas": {
"type": "text",
"value": "{}"
},
"view_version": {
"type": "select",
"options": ["1", "2"],
"value": 1
},
"color_palette_token": {
"type": "text",
"value": "e7882faa26e7"
}
}
}
},
"destroy": {
"method": "delete",
"action": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts/bce2fc7d4910"
}
}
}]
}
}

View File

@ -0,0 +1,100 @@
{
"_links": {
"self": {
"href": "/api/acryl/data_sources"
}
},
"_embedded": {
"data_sources": [{
"id": 34337,
"name": "PostgreSQL",
"description": "",
"token": "3c9b417f20d4",
"adapter": "jdbc:postgresql",
"created_at": "2021-11-30T19:49:59.884Z",
"updated_at": "2021-11-30T19:49:59.884Z",
"has_expensive_schema_updates": false,
"public": false,
"asleep": false,
"queryable": true,
"display_name": "PostgreSQL",
"account_id": 745241,
"account_username": "acryl",
"organization_token": "d82b886a1cb7",
"default": true,
"default_for_organization_id": "d82b886a1cb7",
"database": "dvdrental",
"host": "72.38.17.64",
"port": 5432,
"ssl": true,
"username": "postgres",
"provider": "default",
"vendor": "postgresql",
"ldap": false,
"warehouse": null,
"bridged": false,
"adapter_version": "42.2.23",
"custom_attributes": {
"instance_name": ""
},
"default_access_level": "Query",
"_links": {
"self": {
"href": "/api/acryl/data_sources/3c9b417f20d4"
},
"account": {
"href": "/api/acryl"
},
"permissions": {
"href": "/api/acryl/data_sources/3c9b417f20d4/permissions"
},
"web": {
"href": "/organizations/acryl/data_sources/3c9b417f20d4"
},
"web_home": {
"href": "/home/acryl/data_sources/3c9b417f20d4"
}
},
"_forms": {
"edit": {
"method": "patch",
"action": "/api/acryl/data_sources/3c9b417f20d4",
"content_type": "application/json",
"input": {
"data_source": {
"tables[]": {
"name": {
"type": "text"
},
"description": {
"type": "text"
},
"schema": {
"type": "text"
},
"columns[]": {
"name": {
"type": "text"
},
"data_type": {
"type": "text"
},
"is_nullable": {
"type": "select",
"options": [true, false],
"value": true
},
"primary_key": {
"type": "select",
"options": [true, false],
"value": false
}
}
}
}
}
}
}
}]
}
}

View File

@ -0,0 +1,76 @@
{
"_links": {
"self": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries"
}
},
"_embedded": {
"queries": [{
"id": 10104033,
"token": "bc5f397e4b77",
"raw_query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date\\nFROM\\n\\tcustomer\\nINNER JOIN public.payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date;\\n",
"created_at": "2021-12-06T21:52:01.072Z",
"updated_at": "2021-12-06T21:53:38.917Z",
"name": "Customer Payment",
"last_run_id": 1888523732,
"data_source_id": 34337,
"explorations_count": 0,
"_links": {
"self": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77"
},
"report": {
"href": "/api/acryl/reports/72f2ef8fb3a8"
},
"report_runs": {
"href": "/api/acryl/reports/72f2ef8fb3a8/runs"
},
"charts": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts"
},
"new_chart": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts/new"
},
"new_query_table": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/tables/new"
},
"query_tables": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/tables"
},
"query_runs": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/runs"
},
"creator": {
"href": "/api/modeuser"
}
},
"_forms": {
"edit": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77",
"content_type": "application/json",
"input": {
"query": {
"raw_query": {
"type": "text",
"value": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date\\nFROM\\n\\tcustomer\\nINNER JOIN public.payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date;\\n"
},
"name": {
"type": "text",
"value": "Customer Payment"
},
"data_source_id": {
"type": "text",
"value": 34337
}
}
}
},
"destroy": {
"method": "delete",
"action": "/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77"
}
}
}]
}
}

View File

@ -0,0 +1,212 @@
{
"_links": {
"self": {
"href": "/api/acryl/collections/9026edbd5a3c/reports"
}
},
"_embedded": {
"reports": [{
"token": "72f2ef8fb3a8",
"id": 2910167,
"name": "Actor Report",
"description": "Actor Report Description",
"created_at": "2021-11-30T19:52:16.996Z",
"updated_at": "2021-12-06T21:52:42.433Z",
"published_at": null,
"edited_at": "2021-12-08T03:51:18.665Z",
"theme_id": 3,
"color_mappings": {},
"type": "Report",
"last_successful_sync_at": null,
"last_saved_at": "2021-12-08T03:51:18.670Z",
"archived": false,
"space_token": "9026edbd5a3c",
"account_id": 745241,
"account_username": "acryl",
"public": false,
"full_width": false,
"manual_run_disabled": false,
"run_privately": true,
"drilldowns_enabled": true,
"is_embedded": false,
"is_signed": false,
"shared": false,
"expected_runtime": 1.631274,
"last_successfully_run_at": "2021-12-06T21:52:42.224Z",
"last_run_at": "2021-12-06T21:52:40.370Z",
"web_preview_image": "https://s3.us-west-2.amazonaws.com/mode.production/report-run-previews/web/acryl/72f2ef8fb3a8/7a793aa00167/7216e5a24022?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAJZYSJ2XHDM2CQN2Q%2F20211208%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20211208T035549Z&X-Amz-Expires=119&X-Amz-SignedHeaders=host&X-Amz-Signature=f4590b8af4c5f4904be52fed8f0f2cb52c53ff11bbfe0e000fc203fbc6d80152",
"last_successful_run_token": "7a793aa00167",
"flamingo_signature": "c2lnbmF0dXJlPTZiMjYzMGFkMjgxNDA3MWIxMWE5YzA5M2QzZDM2YzU4NmFiNjdhZTBmODg4NmU1ZjlhZDM2MzJmMjM2MDMwNTAmb3JnYW5pemF0aW9uX3Rva2VuPWQ4MmI4ODZhMWNiNyZzaGFyZWRfa2V5X2lkPWZsYW1pbmdvLXJlYWQtMSZ0YWdfZXhwcmVzc2lvbj1PX2Q4MmI4ODZhMWNiNyslMjYlMjYrUl83MmYyZWY4ZmIzYTgmdGltZXN0YW1wPTE2Mzg5MzU3NDkmdXNlcl90b2tlbj04NTE1NzIyODA5OTM=",
"query_count": 1,
"max_query_count": 160,
"chart_count": 1,
"runs_count": 8,
"schedules_count": 0,
"query_preview": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date\\nFROM\\n\\tcustomer\\nINNER JOIN public.payment \\n",
"view_count": 49,
"_links": {
"self": {
"href": "/api/acryl/reports/72f2ef8fb3a8"
},
"web": {
"href": "https://app.mode.com/acryl/reports/72f2ef8fb3a8"
},
"web_edit": {
"href": "/editor/acryl/reports/72f2ef8fb3a8"
},
"share": {
"href": "/acryl/reports/72f2ef8fb3a8"
},
"web_report_runs": {
"href": "/acryl/reports/72f2ef8fb3a8/runs"
},
"account": {
"href": "/api/acryl"
},
"report_run": {
"templated": true,
"href": "/api/acryl/reports/72f2ef8fb3a8/runs/{id}?embed[result]=1"
},
"space": {
"href": "/api/acryl/collections/9026edbd5a3c"
},
"queries": {
"href": "/api/acryl/reports/72f2ef8fb3a8/queries"
},
"report_runs": {
"href": "/api/acryl/reports/72f2ef8fb3a8/runs"
},
"report_pins": {
"href": "/api/acryl/reports/72f2ef8fb3a8/pins"
},
"report_schedules": {
"href": "/api/acryl/reports/72f2ef8fb3a8/schedules"
},
"python_visualizations": {
"href": "/api/acryl/reports/72f2ef8fb3a8/layout_cells"
},
"last_run": {
"href": "/api/acryl/reports/72f2ef8fb3a8/runs/7a793aa00167"
},
"last_successful_run": {
"href": "/api/acryl/reports/72f2ef8fb3a8/runs/7a793aa00167"
},
"python_notebook": {
"href": "/api/acryl/reports/72f2ef8fb3a8/notebooks/81800b084768"
},
"perspective_email_subscription_memberships": {
"href": "/api/acryl/reports/72f2ef8fb3a8/perspective_email_report_subscription_memberships"
},
"validate_email_subscriber": {
"templated": true,
"href": "/api/acryl/reports/72f2ef8fb3a8/email_report_subscribers/validate{?subscriber[email]}"
},
"creator": {
"href": "/api/modeuser"
},
"report_theme": {
"href": "/api/modeanalytics/report_themes/3818837139b7"
},
"report_index_web": {
"href": "/acryl/spaces/9026edbd5a3c/reports/72f2ef8fb3a8"
}
},
"_forms": {
"edit": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8",
"input": {
"report": {
"name": {
"type": "text",
"value": "Actor Report"
},
"description": {
"type": "text",
"value": "Actor Report Description"
},
"account_id": {
"type": "text",
"value": 745241
},
"space_token": {
"type": "text",
"value": "9026edbd5a3c"
},
"color_mappings": {
"type": "text",
"value": {}
},
"published": {
"type": "text",
"value": false
}
}
}
},
"destroy": {
"method": "delete",
"action": "/api/acryl/reports/72f2ef8fb3a8"
},
"archive": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8/archive"
},
"unarchive": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8/unarchive"
},
"batch_edit_filters": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8/filters/update_batch",
"input": {
"report": {
"filters[]": {
"token": {
"type": "text"
},
"options": {
"type": "text"
}
}
}
}
},
"clone": {
"method": "post",
"action": "/acryl/reports/72f2ef8fb3a8/runs/7a793aa00167/clone",
"input": {
"owner_id": {
"type": "text",
"value": 745241
}
}
},
"update_settings": {
"method": "patch",
"action": "/api/acryl/reports/72f2ef8fb3a8/update_settings",
"input": {
"report": {
"theme": {
"type": "select",
"options": [
["Mode Dark", 1],
["Mode Grey", 2],
["Mode Light", 3],
["Mode Subway", 5]
],
"value": 3
},
"full_width": {
"type": "select",
"options": [true, false],
"value": false
}
}
}
}
}
}]
}
}

View File

@ -0,0 +1,60 @@
{
"_links": {
"self": {
"href": "/api/acryl/collections"
}
},
"_embedded": {
"spaces": [{
"token": "9026edbd5a3c",
"id": 1905171,
"space_type": "custom",
"name": "Petabloc",
"description": null,
"state": "active",
"restricted": false,
"free_default": true,
"viewable?": true,
"viewed?": true,
"default_access_level": "edit",
"_links": {
"self": {
"href": "/api/acryl/collections/9026edbd5a3c"
},
"detail": {
"href": "/api/acryl/collections/9026edbd5a3c/detail"
},
"web": {
"href": "/acryl/spaces/9026edbd5a3c"
},
"reports": {
"href": "/api/acryl/collections/9026edbd5a3c/reports"
},
"creator": {
"href": "/api/modeuser"
},
"user_space_membership": {
"href": "/api/acryl/collections/9026edbd5a3c/memberships/0a48a4a59f55"
},
"space_memberships": {
"href": "/api/acryl/collections/9026edbd5a3c/memberships"
},
"preview_space_memberships": {
"href": "/api/acryl/collections/9026edbd5a3c/memberships/preview"
},
"search_space_permissions": {
"href": "/api/acryl/collections/9026edbd5a3c/permissions/search"
},
"viewed": {
"href": "/api/acryl/collections/9026edbd5a3c/viewed"
}
},
"_forms": {
"destroy_entitlements": {
"method": "delete",
"action": "/api/acryl/collections/9026edbd5a3c/permissions"
}
}
}]
}
}

View File

@ -0,0 +1,156 @@
{
"username": "modeuser",
"name": "Jawad",
"id": 745240,
"token": "851572280993",
"email": "jqureshi@acryl.com",
"dataset_size_limit_mb": 10,
"query_run_size_limit_mb": 1000,
"email_verified": true,
"avatar": {
"initials": "JA",
"seed": "851572280993",
"color_class": "mode-avatar-color-8",
"type": "initials"
},
"user": true,
"data_source_count": 0,
"organizations_count": 1,
"created_at": "2021-11-29T19:32:33.401Z",
"_links": {
"self": {
"href": "/api/modeuser"
},
"web": {
"href": "/modeuser"
},
"web_settings": {
"href": "/settings"
},
"web_public_datasource_home": {
"href": "/home/modeuser/data_sources/0a671f65888c"
},
"web_spaces": {
"href": "/modeuser/spaces"
},
"web_groups": {
"href": "/organizations/modeuser/groups"
},
"web_new_organization": {
"href": "/organizations/new"
},
"data_sources": {
"href": "/api/modeuser/data_sources"
},
"data_source": {
"templated": true,
"href": "/api/modeuser/data_sources{/id}"
},
"home_web": {
"href": "/home/modeuser"
},
"home_starred_web": {
"href": "/home/modeuser/starred"
},
"home_explorations_web": {
"href": "/home/modeuser/explorations"
},
"home_search_web": {
"href": "/home/modeuser/search"
},
"new_invite_web": {
"href": "/modeuser/invites/new"
},
"new_upload_web": {
"href": "/modeuser/uploads/new"
},
"public_data_sources": {
"href": "/api/data_sources"
},
"organizations": {
"href": "/api/modeuser/organizations"
},
"preference": {
"href": "/api/modeuser/preferences"
},
"table": {
"templated": true,
"href": "/api/modeuser/tables{/id}"
},
"report": {
"templated": true,
"href": "/api/modeuser/reports{/id}"
},
"reports": {
"href": "/api/modeuser/reports"
},
"archived_reports": {
"href": "/api/modeuser/reports?filter=archived"
},
"public_reports": {
"href": "/api/modeuser/reports?filter=public"
},
"drafts_reports": {
"href": "/api/modeuser/reports?filter=drafts"
},
"starred_reports": {
"href": "/api/modeuser/reports?filter=starred"
},
"by_ids_reports": {
"templated": true,
"href": "/api/modeuser/reports?filter=by_ids&ids={ids}"
},
"viewed_reports": {
"href": "/api/modeuser/reports?filter=viewed"
},
"by_tokens_definitions": {
"templated": true,
"href": "/api/modeuser/definitions?filter=by_tokens&tokens={tokens}"
},
"bridges": {
"href": "/api/modeuser/bridges"
},
"access_tokens": {
"href": "/api/modeuser/access_tokens"
},
"new_report": {
"href": "/api/modeuser/reports/new"
},
"new_report_web": {
"href": "/editor/modeuser/reports/new"
},
"validate_table": {
"href": "/api/table/validate"
},
"report_views": {
"href": "/api/modeuser/report_views"
},
"spaces": {
"href": "/api/modeuser/collections"
},
"space": {
"templated": true,
"href": "/api/modeuser/collections{/id}"
},
"custom_spaces": {
"href": "/api/modeuser/collections?filter=custom"
},
"move_to_spaces": {
"href": "/api/modeuser/collections?filter=editable"
},
"definitions": {
"href": "/api/modeuser/definitions"
},
"definition": {
"templated": true,
"href": "/api/modeuser/definitions{/id}"
},
"all_color_palettes": {
"href": "/api/color_palettes/predefined"
},
"validate_definition_name": {
"templated": true,
"href": "/api/modeuser/definitions/validate{?definition[name]}"
}
}
}

View File

@ -0,0 +1,146 @@
import json
from unittest.mock import patch
from freezegun import freeze_time
from requests.models import HTTPError
from datahub.configuration.common import PipelineExecutionError
from datahub.ingestion.run.pipeline import Pipeline
from tests.test_helpers import mce_helpers
FROZEN_TIME = "2021-12-07 07:00:00"
JSON_RESPONSE_MAP = {
"https://app.mode.com/api/account": "user.json",
"https://app.mode.com/api/acryl/spaces": "spaces.json",
"https://app.mode.com/api/acryl/spaces/9026edbd5a3c/reports": "reports.json",
"https://app.mode.com/api/modeuser": "user.json",
"https://app.mode.com/api/acryl/reports/72f2ef8fb3a8/queries": "queries.json",
"https://app.mode.com/api/acryl/reports/72f2ef8fb3a8/queries/bc5f397e4b77/charts": "charts.json",
"https://app.mode.com/api/acryl/data_sources": "data_sources.json",
}
RESPONSE_ERROR_LIST = ["https://app.mode.com/api/acryl/spaces/9026edbd5a3c/reports"]
test_resources_dir = None
class MockResponse:
def __init__(self, error_list, status_code):
self.json_data = None
self.error_list = error_list
self.status_code = status_code
self.auth = None
self.headers = {}
self.url = None
def json(self):
return self.json_data
def get(self, url):
self.url = url
response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}"
with open(response_json_path) as file:
data = json.loads(file.read())
self.json_data = data
return self
def raise_for_status(self):
if self.error_list is not None and self.url in self.error_list:
http_error_msg = "%s Client Error: %s for url: %s" % (
400,
"Simulate error",
self.url,
)
raise HTTPError(http_error_msg, response=self)
def mocked_requests_sucess(*args, **kwargs):
return MockResponse(None, 200)
def mocked_requests_failure(*args, **kwargs):
return MockResponse(RESPONSE_ERROR_LIST, 200)
@freeze_time(FROZEN_TIME)
def test_mode_ingest_success(pytestconfig, tmp_path):
with patch(
"datahub.ingestion.source.mode.requests.session",
side_effect=mocked_requests_sucess,
):
global test_resources_dir
test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"
pipeline = Pipeline.create(
{
"run_id": "mode-test",
"source": {
"type": "mode",
"config": {
"token": "xxxx",
"password": "xxxx",
"connect_uri": "https://app.mode.com/",
"workspace": "acryl",
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/mode_mces.json",
},
},
}
)
pipeline.run()
pipeline.raise_from_status()
mce_helpers.check_golden_file(
pytestconfig,
output_path=f"{tmp_path}/mode_mces.json",
golden_path=test_resources_dir / "mode_mces_golden.json",
ignore_paths=mce_helpers.IGNORE_PATH_TIMESTAMPS,
)
@freeze_time(FROZEN_TIME)
def test_mode_ingest_failure(pytestconfig, tmp_path):
with patch(
"datahub.ingestion.source.mode.requests.session",
side_effect=mocked_requests_failure,
):
global test_resources_dir
test_resources_dir = pytestconfig.rootpath / "tests/integration/mode"
pipeline = Pipeline.create(
{
"run_id": "mode-test",
"source": {
"type": "mode",
"config": {
"token": "xxxx",
"password": "xxxx",
"connect_uri": "https://app.mode.com/",
"workspace": "acryl",
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/mode_mces.json",
},
},
}
)
pipeline.run()
try:
pipeline.raise_from_status()
except PipelineExecutionError as exec_error:
assert exec_error.args[0] == "Source reported errors"
assert len(exec_error.args[1].failures) == 1
assert (
list(exec_error.args[1].failures.keys())[0]
== "mode-report-9026edbd5a3c"
)

View File

@ -117,6 +117,16 @@
"logoUrl": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/kustologo.png"
}
},
{
"urn": "urn:li:dataPlatform:mode",
"aspect": {
"datasetNameDelimiter": ".",
"name": "mode",
"displayName": "Mode",
"type": "KEY_VALUE_STORE",
"logoUrl": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/modelogo.png"
}
},
{
"urn": "urn:li:dataPlatform:mongodb",
"aspect": {