mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-26 01:18:20 +00:00
fix(cli): correct handling of env variables (#5203)
This commit is contained in:
parent
0ee2569d5c
commit
d518b5a085
@ -129,7 +129,10 @@ datahub check plugins
|
||||
## Environment variables supported
|
||||
The env variables take precedence over what is in the DataHub CLI config created through `init` command. The list of supported environment variables are as follows
|
||||
- `DATAHUB_SKIP_CONFIG` (default `false`) - Set to `true` to skip creating the configuration file.
|
||||
- `DATAHUB_GMS_HOST` (default `http://localhost:8080`) - Set to a URL of GMS instance.
|
||||
- `DATAHUB_GMS_URL` (default `http://localhost:8080`) - Set to a URL of GMS instance
|
||||
- `DATAHUB_GMS_HOST` (default `localhost`) - Set to a host of GMS instance. Prefer using `DATAHUB_GMS_URL` to set the URL.
|
||||
- `DATAHUB_GMS_PORT` (default `8080`) - Set to a port of GMS instance. Prefer using `DATAHUB_GMS_URL` to set the URL.
|
||||
- `DATAHUB_GMS_PROTOCOL` (default `http`) - Set to a protocol like `http` or `https`. Prefer using `DATAHUB_GMS_URL` to set the URL.
|
||||
- `DATAHUB_GMS_TOKEN` (default `None`) - Used for communicating with DataHub Cloud.
|
||||
- `DATAHUB_TELEMETRY_ENABLED` (default `true`) - Set to `false` to disable telemetry. If CLI is being run in an environment with no access to public internet then this should be disabled.
|
||||
- `DATAHUB_TELEMETRY_TIMEOUT` (default `10`) - Set to a custom integer value to specify timeout in secs when sending telemetry.
|
||||
@ -139,7 +142,7 @@ The env variables take precedence over what is in the DataHub CLI config created
|
||||
|
||||
```shell
|
||||
DATAHUB_SKIP_CONFIG=false
|
||||
DATAHUB_GMS_HOST=http://localhost:8080
|
||||
DATAHUB_GMS_URL=http://localhost:8080
|
||||
DATAHUB_GMS_TOKEN=
|
||||
DATAHUB_TELEMETRY_ENABLED=true
|
||||
DATAHUB_TELEMETRY_TIMEOUT=10
|
||||
|
||||
@ -54,7 +54,7 @@ Make sure yaml plugin is installed for your editor:
|
||||
:::
|
||||
|
||||
Since `acryl-datahub` version `>=0.8.33.2`, the default sink is assumed to be a DataHub REST endpoint:
|
||||
- Hosted at "http://localhost:8080" or the environment variable `${DATAHUB_GMS_HOST}` if present
|
||||
- Hosted at "http://localhost:8080" or the environment variable `${DATAHUB_GMS_URL}` if present
|
||||
- With an empty auth token or the environment variable `${DATAHUB_GMS_TOKEN}` if present.
|
||||
|
||||
Here's a simple recipe that pulls metadata from MSSQL (source) and puts it into the default sink (datahub rest).
|
||||
@ -79,7 +79,7 @@ datahub ingest -c recipe.dhub.yaml
|
||||
|
||||
or if you want to override the default endpoints, you can provide the environment variables as part of the command like below:
|
||||
```shell
|
||||
DATAHUB_GMS_HOST="https://my-datahub-server:8080" DATAHUB_GMS_TOKEN="my-datahub-token" datahub ingest -c recipe.dhub.yaml
|
||||
DATAHUB_GMS_URL="https://my-datahub-server:8080" DATAHUB_GMS_TOKEN="my-datahub-token" datahub ingest -c recipe.dhub.yaml
|
||||
```
|
||||
|
||||
A number of recipes are included in the [examples/recipes](./examples/recipes) directory. For full info and context on each source and sink, see the pages described in the [table of plugins](../docs/cli.md#installing-plugins).
|
||||
|
||||
@ -64,7 +64,12 @@ CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
|
||||
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
|
||||
|
||||
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
|
||||
ENV_METADATA_HOST_URL = "DATAHUB_GMS_URL"
|
||||
ENV_METADATA_HOST = "DATAHUB_GMS_HOST"
|
||||
ENV_METADATA_PORT = "DATAHUB_GMS_PORT"
|
||||
ENV_METADATA_PROTOCOL = "DATAHUB_GMS_PROTOCOL"
|
||||
ENV_METADATA_HOST_DEPRECATED = "GMS_HOST"
|
||||
ENV_METADATA_PORT_DEPRECATED = "GMS_PORT"
|
||||
ENV_METADATA_TOKEN = "DATAHUB_GMS_TOKEN"
|
||||
|
||||
config_override: Dict = {}
|
||||
@ -79,9 +84,9 @@ class DatahubConfig(BaseModel):
|
||||
gms: GmsConfig
|
||||
|
||||
|
||||
def set_env_variables_override_config(host: str, token: Optional[str]) -> None:
|
||||
def set_env_variables_override_config(url: str, token: Optional[str]) -> None:
|
||||
"""Should be used to override the config when using rest emitter"""
|
||||
config_override[ENV_METADATA_HOST] = host
|
||||
config_override[ENV_METADATA_HOST_URL] = url
|
||||
if token is not None:
|
||||
config_override[ENV_METADATA_TOKEN] = token
|
||||
|
||||
@ -135,7 +140,25 @@ def get_details_from_config():
|
||||
|
||||
|
||||
def get_details_from_env() -> Tuple[Optional[str], Optional[str]]:
|
||||
return os.environ.get(ENV_METADATA_HOST), os.environ.get(ENV_METADATA_TOKEN)
|
||||
host = os.environ.get(ENV_METADATA_HOST) or os.environ.get(
|
||||
ENV_METADATA_HOST_DEPRECATED
|
||||
)
|
||||
port = os.environ.get(ENV_METADATA_PORT) or os.environ.get(
|
||||
ENV_METADATA_PORT_DEPRECATED
|
||||
)
|
||||
token = os.environ.get(ENV_METADATA_TOKEN)
|
||||
protocol = os.environ.get(ENV_METADATA_PROTOCOL, "http")
|
||||
url = os.environ.get(ENV_METADATA_HOST_URL)
|
||||
if port is not None:
|
||||
url = f"{protocol}://{host}:{port}"
|
||||
return url, token
|
||||
# The reason for using host as URL is backward compatibility
|
||||
# If port is not being used we assume someone is using host env var as URL
|
||||
if url is None and host is not None:
|
||||
log.warning(
|
||||
f"Do not use {ENV_METADATA_HOST} as URL. Use {ENV_METADATA_HOST_URL} instead"
|
||||
)
|
||||
return url or host, token
|
||||
|
||||
|
||||
def first_non_null(ls: List[Optional[str]]) -> Optional[str]:
|
||||
@ -147,10 +170,10 @@ def guess_entity_type(urn: str) -> str:
|
||||
return urn.split(":")[2]
|
||||
|
||||
|
||||
def get_host_and_token():
|
||||
def get_url_and_token():
|
||||
gms_host_env, gms_token_env = get_details_from_env()
|
||||
if len(config_override.keys()) > 0:
|
||||
gms_host = config_override.get(ENV_METADATA_HOST)
|
||||
gms_host = config_override.get(ENV_METADATA_HOST_URL)
|
||||
gms_token = config_override.get(ENV_METADATA_TOKEN)
|
||||
elif should_skip_config():
|
||||
gms_host = gms_host_env
|
||||
@ -164,17 +187,17 @@ def get_host_and_token():
|
||||
|
||||
|
||||
def get_token():
|
||||
return get_host_and_token()[1]
|
||||
return get_url_and_token()[1]
|
||||
|
||||
|
||||
def get_session_and_host():
|
||||
session = requests.Session()
|
||||
|
||||
gms_host, gms_token = get_host_and_token()
|
||||
gms_host, gms_token = get_url_and_token()
|
||||
|
||||
if gms_host is None or gms_host.strip() == "":
|
||||
log.error(
|
||||
f"GMS Host is not set. Use datahub init command or set {ENV_METADATA_HOST} env var"
|
||||
f"GMS Host is not set. Use datahub init command or set {ENV_METADATA_HOST_URL} env var"
|
||||
)
|
||||
return None, None
|
||||
|
||||
|
||||
@ -7,6 +7,7 @@ from typing import Any, Dict, Iterable, List, Optional
|
||||
import click
|
||||
from pydantic import root_validator, validator
|
||||
|
||||
from datahub.cli.cli_utils import get_url_and_token
|
||||
from datahub.configuration import config_loader
|
||||
from datahub.configuration.common import (
|
||||
ConfigModel,
|
||||
@ -67,11 +68,12 @@ class PipelineConfig(ConfigModel):
|
||||
@root_validator(pre=True)
|
||||
def default_sink_is_datahub_rest(cls, values: Dict[str, Any]) -> Any:
|
||||
if "sink" not in values:
|
||||
gms_host, gms_token = get_url_and_token()
|
||||
default_sink_config = {
|
||||
"type": "datahub-rest",
|
||||
"config": {
|
||||
"server": "${DATAHUB_GMS_HOST:-http://localhost:8080}",
|
||||
"token": "${DATAHUB_GMS_TOKEN:-}",
|
||||
"server": gms_host,
|
||||
"token": gms_token,
|
||||
},
|
||||
}
|
||||
# resolve env variables if present
|
||||
|
||||
@ -104,7 +104,7 @@ def retrieve_versions( # noqa: C901
|
||||
if not server:
|
||||
try:
|
||||
# let's get the server from the cli config
|
||||
host, token = cli_utils.get_host_and_token()
|
||||
host, token = cli_utils.get_url_and_token()
|
||||
server = DataHubGraph(DatahubClientConfig(server=host, token=token))
|
||||
except Exception as e:
|
||||
log.debug("Failed to get a valid server", e)
|
||||
|
||||
@ -1,3 +1,6 @@
|
||||
import os
|
||||
from unittest import mock
|
||||
|
||||
from datahub.cli import cli_utils
|
||||
|
||||
|
||||
@ -9,3 +12,60 @@ def test_first_non_null():
|
||||
assert cli_utils.first_non_null(["3", "1", "2"]) == "3"
|
||||
assert cli_utils.first_non_null(["", "1", "2"]) == "1"
|
||||
assert cli_utils.first_non_null([" ", "1", "2"]) == "1"
|
||||
|
||||
|
||||
@mock.patch.dict(os.environ, {"DATAHUB_GMS_HOST": "http://localhost:9092"})
|
||||
def test_correct_url_when_gms_host_in_old_format():
|
||||
assert cli_utils.get_details_from_env() == ("http://localhost:9092", None)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
os.environ, {"DATAHUB_GMS_HOST": "localhost", "DATAHUB_GMS_PORT": "8080"}
|
||||
)
|
||||
def test_correct_url_when_gms_host_and_port_set():
|
||||
assert cli_utils.get_details_from_env() == ("http://localhost:8080", None)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"DATAHUB_GMS_URL": "https://example.com",
|
||||
"DATAHUB_GMS_HOST": "localhost",
|
||||
"DATAHUB_GMS_PORT": "8080",
|
||||
},
|
||||
)
|
||||
def test_correct_url_when_gms_host_port_url_set():
|
||||
assert cli_utils.get_details_from_env() == ("http://localhost:8080", None)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"DATAHUB_GMS_URL": "https://example.com",
|
||||
"DATAHUB_GMS_HOST": "localhost",
|
||||
"DATAHUB_GMS_PORT": "8080",
|
||||
"DATAHUB_GMS_PROTOCOL": "https",
|
||||
},
|
||||
)
|
||||
def test_correct_url_when_gms_host_port_url_protocol_set():
|
||||
assert cli_utils.get_details_from_env() == ("https://localhost:8080", None)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"DATAHUB_GMS_URL": "https://example.com",
|
||||
},
|
||||
)
|
||||
def test_correct_url_when_url_set():
|
||||
assert cli_utils.get_details_from_env() == ("https://example.com", None)
|
||||
|
||||
|
||||
@mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
"GMS_HOST": "https://example.com",
|
||||
},
|
||||
)
|
||||
def test_correct_url_when_deprecated_host_env_set():
|
||||
assert cli_utils.get_details_from_env() == ("https://example.com", None)
|
||||
|
||||
@ -66,7 +66,7 @@ class TestPipeline(object):
|
||||
assert pipeline.config.sink.type == "datahub-rest"
|
||||
assert pipeline.config.sink.config == {
|
||||
"server": "http://localhost:8080",
|
||||
"token": "",
|
||||
"token": None,
|
||||
}
|
||||
|
||||
@freeze_time(FROZEN_TIME)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user