fix(ingest): loosen Kafka broker validation (#2172)

This commit is contained in:
Harshal Sheth 2021-03-04 23:17:36 -08:00 committed by GitHub
parent e575add1fb
commit ae9ffde8d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,3 +1,5 @@
import re
from pydantic import validator
from datahub.configuration.common import ConfigModel
@ -16,12 +18,23 @@ class _KafkaConnectionConfig(ConfigModel):
schema_registry_config: dict = {}
@validator("bootstrap")
def bootstrap_host_colon_port_comma(cls, val):
def bootstrap_host_colon_port_comma(cls, val: str):
for entry in val.split(","):
assert ":" in entry, f"entry must be of the form host:port, found {entry}"
(host, port) = entry.split(":")
assert host.isalnum(), f"host must be alphanumeric, found {host}"
assert port.isdigit(), f"port must be all digits, found {port}"
# The port can be provided but is not required.
port = None
if ":" in entry:
(host, port) = entry.rsplit(":", 1)
else:
host = entry
assert re.match(
# This regex is quite loose. Many invalid hostnames or IPs will slip through,
# but it serves as a good first line of validation. We defer to Kafka for the
# remaining validation.
r"^[\w\-\.\:]+$",
host,
), f"host contains bad characters, found {host}"
if port is not None:
assert port.isdigit(), f"port must be all digits, found {port}"
class KafkaConsumerConnectionConfig(_KafkaConnectionConfig):