2022-01-21 09:35:42 -06:00
|
|
|
import errno
|
2021-12-07 08:57:12 -08:00
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
import os
|
2022-01-12 21:47:42 -08:00
|
|
|
import platform
|
2021-12-07 08:57:12 -08:00
|
|
|
import uuid
|
|
|
|
from functools import wraps
|
|
|
|
from pathlib import Path
|
2022-02-24 15:35:48 -05:00
|
|
|
from typing import Any, Callable, Dict, Optional, TypeVar
|
2021-12-07 08:57:12 -08:00
|
|
|
|
2022-02-24 15:35:48 -05:00
|
|
|
from mixpanel import Consumer, Mixpanel
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
import datahub as datahub_package
|
2022-07-15 14:15:24 +05:30
|
|
|
from datahub.cli.cli_utils import DATAHUB_ROOT_FOLDER
|
2022-05-27 19:04:45 -05:00
|
|
|
from datahub.ingestion.graph.client import DataHubGraph
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2022-07-15 14:15:24 +05:30
|
|
|
DATAHUB_FOLDER = Path(DATAHUB_ROOT_FOLDER)
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
CONFIG_FILE = DATAHUB_FOLDER / "telemetry-config.json"
|
|
|
|
|
|
|
|
# also fall back to environment variable if config file is not found
|
|
|
|
ENV_ENABLED = os.environ.get("DATAHUB_TELEMETRY_ENABLED", "true").lower() == "true"
|
2022-03-29 16:21:53 -04:00
|
|
|
|
|
|
|
# see
|
|
|
|
# https://adamj.eu/tech/2020/03/09/detect-if-your-tests-are-running-on-ci/
|
|
|
|
# https://github.com/watson/ci-info
|
|
|
|
CI_ENV_VARS = {
|
|
|
|
"APPCENTER",
|
|
|
|
"APPCIRCLE",
|
|
|
|
"APPCIRCLEAZURE_PIPELINES",
|
|
|
|
"APPVEYOR",
|
|
|
|
"AZURE_PIPELINES",
|
|
|
|
"BAMBOO",
|
|
|
|
"BITBUCKET",
|
|
|
|
"BITRISE",
|
|
|
|
"BUDDY",
|
|
|
|
"BUILDKITE",
|
|
|
|
"BUILD_ID",
|
|
|
|
"CI",
|
|
|
|
"CIRCLE",
|
|
|
|
"CIRCLECI",
|
|
|
|
"CIRRUS",
|
|
|
|
"CIRRUS_CI",
|
|
|
|
"CI_NAME",
|
|
|
|
"CODEBUILD",
|
|
|
|
"CODEBUILD_BUILD_ID",
|
|
|
|
"CODEFRESH",
|
|
|
|
"CODESHIP",
|
|
|
|
"CYPRESS_HOST",
|
|
|
|
"DRONE",
|
|
|
|
"DSARI",
|
|
|
|
"EAS_BUILD",
|
|
|
|
"GITHUB_ACTIONS",
|
|
|
|
"GITLAB",
|
|
|
|
"GITLAB_CI",
|
|
|
|
"GOCD",
|
|
|
|
"HEROKU_TEST_RUN_ID",
|
|
|
|
"HUDSON",
|
|
|
|
"JENKINS",
|
|
|
|
"JENKINS_URL",
|
|
|
|
"LAYERCI",
|
|
|
|
"MAGNUM",
|
|
|
|
"NETLIFY",
|
|
|
|
"NEVERCODE",
|
|
|
|
"RENDER",
|
|
|
|
"SAIL",
|
|
|
|
"SCREWDRIVER",
|
|
|
|
"SEMAPHORE",
|
|
|
|
"SHIPPABLE",
|
|
|
|
"SOLANO",
|
|
|
|
"STRIDER",
|
|
|
|
"TASKCLUSTER",
|
|
|
|
"TEAMCITY",
|
|
|
|
"TEAMCITY_VERSION",
|
|
|
|
"TF_BUILD",
|
|
|
|
"TRAVIS",
|
|
|
|
"VERCEL",
|
|
|
|
"WERCKER_ROOT",
|
|
|
|
"bamboo.buildKey",
|
|
|
|
}
|
|
|
|
|
|
|
|
# disable when running in any CI
|
|
|
|
if any(var in os.environ for var in CI_ENV_VARS):
|
|
|
|
ENV_ENABLED = False
|
|
|
|
|
2022-02-16 04:09:46 +05:30
|
|
|
TIMEOUT = int(os.environ.get("DATAHUB_TELEMETRY_TIMEOUT", "10"))
|
2022-02-24 15:35:48 -05:00
|
|
|
MIXPANEL_TOKEN = "5ee83d940754d63cacbf7d34daa6f44a"
|
|
|
|
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
class Telemetry:
|
|
|
|
|
|
|
|
client_id: str
|
|
|
|
enabled: bool = True
|
2022-03-14 21:20:29 +05:30
|
|
|
tracking_init: bool = False
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
2022-04-25 11:05:24 -07:00
|
|
|
# try loading the config if it exists, update it if that fails
|
|
|
|
if not CONFIG_FILE.exists() or not self.load_config():
|
|
|
|
# set up defaults
|
2021-12-07 08:57:12 -08:00
|
|
|
self.client_id = str(uuid.uuid4())
|
2022-08-10 21:58:03 +00:00
|
|
|
self.enabled = self.enabled and ENV_ENABLED
|
|
|
|
if not self.update_config():
|
|
|
|
# If we're not able to persist the client ID, we should default
|
|
|
|
# to a standardized value. This prevents us from minting a new
|
|
|
|
# client ID every time we start the CLI.
|
|
|
|
self.client_id = "00000000-0000-0000-0000-000000000001"
|
2021-12-07 08:57:12 -08:00
|
|
|
|
2022-02-25 00:26:41 -05:00
|
|
|
# send updated user-level properties
|
2022-03-03 23:10:01 +05:30
|
|
|
self.mp = None
|
2022-02-25 00:26:41 -05:00
|
|
|
if self.enabled:
|
2022-03-03 23:10:01 +05:30
|
|
|
try:
|
|
|
|
self.mp = Mixpanel(
|
|
|
|
MIXPANEL_TOKEN, consumer=Consumer(request_timeout=int(TIMEOUT))
|
|
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug(f"Error connecting to mixpanel: {e}")
|
2022-02-24 15:35:48 -05:00
|
|
|
|
2022-04-25 11:05:24 -07:00
|
|
|
def update_config(self) -> bool:
|
2021-12-07 08:57:12 -08:00
|
|
|
"""
|
|
|
|
Update the config file with the current client ID and enabled status.
|
2022-04-25 11:05:24 -07:00
|
|
|
Return True if the update succeeded, False otherwise
|
2021-12-07 08:57:12 -08:00
|
|
|
"""
|
2022-03-16 08:02:03 +05:30
|
|
|
logger.debug("Updating telemetry config")
|
2021-12-07 08:57:12 -08:00
|
|
|
|
2022-01-21 09:35:42 -06:00
|
|
|
try:
|
2022-04-25 11:05:24 -07:00
|
|
|
os.makedirs(DATAHUB_FOLDER, exist_ok=True)
|
|
|
|
try:
|
|
|
|
with open(CONFIG_FILE, "w") as f:
|
|
|
|
json.dump(
|
|
|
|
{"client_id": self.client_id, "enabled": self.enabled},
|
|
|
|
f,
|
|
|
|
indent=2,
|
|
|
|
)
|
|
|
|
return True
|
|
|
|
except IOError as x:
|
|
|
|
if x.errno == errno.ENOENT:
|
|
|
|
logger.debug(
|
|
|
|
f"{CONFIG_FILE} does not exist and could not be created. Please check permissions on the parent folder."
|
|
|
|
)
|
|
|
|
elif x.errno == errno.EACCES:
|
|
|
|
logger.debug(
|
|
|
|
f"{CONFIG_FILE} cannot be read. Please check the permissions on this file."
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
logger.debug(
|
|
|
|
f"{CONFIG_FILE} had an IOError, please inspect this file for issues."
|
|
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug(f"Failed to update config file at {CONFIG_FILE} due to {e}")
|
|
|
|
|
|
|
|
return False
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
def enable(self) -> None:
|
|
|
|
"""
|
|
|
|
Enable telemetry.
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.enabled = True
|
|
|
|
self.update_config()
|
|
|
|
|
|
|
|
def disable(self) -> None:
|
|
|
|
"""
|
|
|
|
Disable telemetry.
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.enabled = False
|
|
|
|
self.update_config()
|
|
|
|
|
2022-04-25 11:05:24 -07:00
|
|
|
def load_config(self) -> bool:
|
2021-12-07 08:57:12 -08:00
|
|
|
"""
|
|
|
|
Load the saved config for the telemetry client ID and enabled status.
|
2022-04-25 11:05:24 -07:00
|
|
|
Returns True if config was correctly loaded, False otherwise.
|
2021-12-07 08:57:12 -08:00
|
|
|
"""
|
|
|
|
|
2022-01-21 09:35:42 -06:00
|
|
|
try:
|
|
|
|
with open(CONFIG_FILE, "r") as f:
|
|
|
|
config = json.load(f)
|
|
|
|
self.client_id = config["client_id"]
|
|
|
|
self.enabled = config["enabled"] & ENV_ENABLED
|
2022-04-25 11:05:24 -07:00
|
|
|
return True
|
2022-01-21 09:35:42 -06:00
|
|
|
except IOError as x:
|
|
|
|
if x.errno == errno.ENOENT:
|
|
|
|
logger.debug(
|
|
|
|
f"{CONFIG_FILE} does not exist and could not be created. Please check permissions on the parent folder."
|
|
|
|
)
|
|
|
|
elif x.errno == errno.EACCES:
|
|
|
|
logger.debug(
|
|
|
|
f"{CONFIG_FILE} cannot be read. Please check the permissions on this file."
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
logger.debug(
|
|
|
|
f"{CONFIG_FILE} had an IOError, please inspect this file for issues."
|
|
|
|
)
|
2022-04-25 11:05:24 -07:00
|
|
|
except Exception as e:
|
|
|
|
logger.debug(f"Failed to load {CONFIG_FILE} due to {e}")
|
|
|
|
|
|
|
|
return False
|
2021-12-07 08:57:12 -08:00
|
|
|
|
2022-03-14 21:20:29 +05:30
|
|
|
def init_tracking(self) -> None:
|
|
|
|
if not self.enabled or self.mp is None or self.tracking_init is True:
|
|
|
|
return
|
|
|
|
|
2022-03-16 08:02:03 +05:30
|
|
|
logger.debug("Sending init Telemetry")
|
2022-03-15 00:55:22 +05:30
|
|
|
try:
|
|
|
|
self.mp.people_set(
|
|
|
|
self.client_id,
|
|
|
|
{
|
|
|
|
"datahub_version": datahub_package.nice_version_name(),
|
|
|
|
"os": platform.system(),
|
|
|
|
"python_version": platform.python_version(),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
logger.debug(f"Error reporting telemetry: {e}")
|
2022-03-14 21:20:29 +05:30
|
|
|
self.init_track = True
|
|
|
|
|
2021-12-07 08:57:12 -08:00
|
|
|
def ping(
|
|
|
|
self,
|
2022-03-23 12:52:29 -07:00
|
|
|
event_name: str,
|
2022-05-27 19:04:45 -05:00
|
|
|
properties: Dict[str, Any] = {},
|
|
|
|
server: Optional[DataHubGraph] = None,
|
2021-12-07 08:57:12 -08:00
|
|
|
) -> None:
|
|
|
|
"""
|
2022-02-24 15:35:48 -05:00
|
|
|
Send a single telemetry event.
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
Args:
|
2022-03-23 12:52:29 -07:00
|
|
|
event_name (str): name of the event to send.
|
|
|
|
properties (Optional[Dict[str, Any]]): metadata for the event
|
2021-12-07 08:57:12 -08:00
|
|
|
"""
|
|
|
|
|
2022-03-03 23:10:01 +05:30
|
|
|
if not self.enabled or self.mp is None:
|
2021-12-07 08:57:12 -08:00
|
|
|
return
|
|
|
|
|
2022-02-24 15:35:48 -05:00
|
|
|
# send event
|
2021-12-07 08:57:12 -08:00
|
|
|
try:
|
2022-03-24 19:57:25 +05:30
|
|
|
logger.debug("Sending Telemetry")
|
2022-05-27 19:04:45 -05:00
|
|
|
properties.update(self._server_props(server))
|
2022-03-23 12:52:29 -07:00
|
|
|
self.mp.track(self.client_id, event_name, properties)
|
2021-12-07 08:57:12 -08:00
|
|
|
|
2022-02-24 15:35:48 -05:00
|
|
|
except Exception as e:
|
2021-12-07 08:57:12 -08:00
|
|
|
logger.debug(f"Error reporting telemetry: {e}")
|
|
|
|
|
2022-05-27 19:04:45 -05:00
|
|
|
def _server_props(self, server: Optional[DataHubGraph]) -> Dict[str, str]:
|
|
|
|
if not server:
|
|
|
|
return {
|
|
|
|
"server_type": "n/a",
|
|
|
|
"server_version": "n/a",
|
|
|
|
"server_id": "n/a",
|
|
|
|
}
|
|
|
|
else:
|
|
|
|
return {
|
|
|
|
"server_type": server.server_config.get("datahub", {}).get(
|
|
|
|
"serverType", "missing"
|
|
|
|
),
|
|
|
|
"server_version": server.server_config.get("versions", {})
|
|
|
|
.get("linkedin/datahub", {})
|
|
|
|
.get("version", "missing"),
|
|
|
|
"server_id": server.server_id or "missing",
|
|
|
|
}
|
|
|
|
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
telemetry_instance = Telemetry()
|
|
|
|
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
|
|
|
2022-07-13 11:24:18 +02:00
|
|
|
def suppress_telemetry() -> Any:
|
|
|
|
"""disables telemetry for this invocation, doesn't affect persistent client settings"""
|
|
|
|
if telemetry_instance.enabled:
|
|
|
|
logger.debug("Disabling telemetry locally due to server config")
|
|
|
|
telemetry_instance.enabled = False
|
2022-03-14 21:20:29 +05:30
|
|
|
|
|
|
|
|
2022-02-05 00:11:04 -05:00
|
|
|
def get_full_class_name(obj):
|
|
|
|
module = obj.__class__.__module__
|
|
|
|
if module is None or module == str.__class__.__module__:
|
|
|
|
return obj.__class__.__name__
|
2022-07-06 20:39:27 +10:00
|
|
|
return f"{module}.{obj.__class__.__name__}"
|
2022-02-05 00:11:04 -05:00
|
|
|
|
|
|
|
|
2021-12-07 08:57:12 -08:00
|
|
|
def with_telemetry(func: Callable[..., T]) -> Callable[..., T]:
|
|
|
|
@wraps(func)
|
|
|
|
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
2022-02-05 00:11:04 -05:00
|
|
|
|
2022-03-23 12:52:29 -07:00
|
|
|
function = f"{func.__module__}.{func.__name__}"
|
2022-02-05 00:11:04 -05:00
|
|
|
|
2022-03-14 21:20:29 +05:30
|
|
|
telemetry_instance.init_tracking()
|
2022-03-23 12:52:29 -07:00
|
|
|
telemetry_instance.ping(
|
|
|
|
"function-call", {"function": function, "status": "start"}
|
|
|
|
)
|
2022-02-05 00:11:04 -05:00
|
|
|
try:
|
|
|
|
res = func(*args, **kwargs)
|
2022-02-24 15:35:48 -05:00
|
|
|
telemetry_instance.ping(
|
2022-03-23 12:52:29 -07:00
|
|
|
"function-call",
|
|
|
|
{"function": function, "status": "completed"},
|
2022-02-24 15:35:48 -05:00
|
|
|
)
|
2022-03-23 12:52:29 -07:00
|
|
|
return res
|
2022-02-05 00:11:04 -05:00
|
|
|
# System exits (used in ingestion and Docker commands) are not caught by the exception handler,
|
|
|
|
# so we need to catch them here.
|
|
|
|
except SystemExit as e:
|
|
|
|
# Forward successful exits
|
2022-03-23 12:52:29 -07:00
|
|
|
# 0 or None imply success
|
|
|
|
if not e.code:
|
|
|
|
telemetry_instance.ping(
|
|
|
|
"function-call",
|
|
|
|
{
|
|
|
|
"function": function,
|
|
|
|
"status": "completed",
|
|
|
|
},
|
|
|
|
)
|
2022-02-05 00:11:04 -05:00
|
|
|
# Report failed exits
|
|
|
|
else:
|
|
|
|
telemetry_instance.ping(
|
2022-03-23 12:52:29 -07:00
|
|
|
"function-call",
|
|
|
|
{
|
|
|
|
"function": function,
|
|
|
|
"status": "error",
|
|
|
|
"error": get_full_class_name(e),
|
|
|
|
},
|
2022-02-05 00:11:04 -05:00
|
|
|
)
|
2022-03-23 12:52:29 -07:00
|
|
|
raise e
|
2022-02-05 00:11:04 -05:00
|
|
|
# Catch SIGINTs
|
2022-03-23 12:52:29 -07:00
|
|
|
except KeyboardInterrupt as e:
|
|
|
|
telemetry_instance.ping(
|
|
|
|
"function-call",
|
|
|
|
{"function": function, "status": "cancelled"},
|
|
|
|
)
|
|
|
|
raise e
|
|
|
|
|
|
|
|
# Catch general exceptions
|
|
|
|
except Exception as e:
|
|
|
|
telemetry_instance.ping(
|
|
|
|
"function-call",
|
|
|
|
{
|
|
|
|
"function": function,
|
|
|
|
"status": "error",
|
|
|
|
"error": get_full_class_name(e),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
raise e
|
2021-12-07 08:57:12 -08:00
|
|
|
|
|
|
|
return wrapper
|