fix(ldap): properly handle escaped characters in LDAP DNs (#7928)

This commit is contained in:
Reilman79 2023-05-03 16:57:52 -04:00 committed by GitHub
parent e5dad3ab79
commit b6e2cc549a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 71 additions and 26 deletions

View File

@ -1,6 +1,5 @@
"""LDAP Source"""
import dataclasses
import re
from typing import Any, Dict, Iterable, List, Optional
import ldap
@ -415,8 +414,8 @@ class LDAPSource(StatefulIngestionSourceBase):
cn = attrs.get(self.config.group_attrs_map["urn"])
if cn:
full_name = cn[0].decode()
admins = parse_from_attrs(attrs, self.config.group_attrs_map["admins"])
members = parse_from_attrs(attrs, self.config.group_attrs_map["members"])
admins = parse_users(attrs, self.config.group_attrs_map["admins"])
members = parse_users(attrs, self.config.group_attrs_map["members"])
email = (
attrs[self.config.group_attrs_map["email"]][0].decode()
if self.config.group_attrs_map["email"] in attrs
@ -457,33 +456,33 @@ class LDAPSource(StatefulIngestionSourceBase):
super().close()
def parse_from_attrs(attrs: Dict[str, Any], filter_key: str) -> List[str]:
"""Converts a list of LDAP formats to Datahub corpuser strings."""
def parse_users(attrs: Dict[str, Any], filter_key: str) -> List[str]:
"""Converts a list of LDAP DNs to Datahub corpuser strings."""
if filter_key in attrs:
return [
f"urn:li:corpuser:{strip_ldap_info(ldap_user)}"
f"urn:li:corpuser:{parse_ldap_dn(ldap_user)}"
for ldap_user in attrs[filter_key]
]
return []
def strip_ldap_info(input_clean: bytes) -> str:
"""Converts a b'uid=username,ou=Groups,dc=internal,dc=machines'
format to username"""
return input_clean.decode().split(",")[0].lstrip("uid=")
def parse_groups(attrs: Dict[str, Any], filter_key: str) -> List[str]:
"""Converts a list of LDAP groups to Datahub corpgroup strings"""
"""Converts a list of LDAP DNs to Datahub corpgroup strings"""
if filter_key in attrs:
return [
f"urn:li:corpGroup:{strip_ldap_group_cn(ldap_group)}"
f"urn:li:corpGroup:{parse_ldap_dn(ldap_group)}"
for ldap_group in attrs[filter_key]
]
return []
def strip_ldap_group_cn(input_clean: bytes) -> str:
"""Converts a b'cn=group_name,ou=Groups,dc=internal,dc=machines'
format to group name"""
return re.sub("cn=", "", input_clean.decode().split(",")[0], flags=re.IGNORECASE)
def parse_ldap_dn(input_clean: bytes) -> str:
"""
Converts an LDAP DN of format b'cn=group_name,ou=Groups,dc=internal,dc=machines'
or b'uid=username,ou=Groups,dc=internal,dc=machines' to group name or username.
Inputs which are not valid LDAP DNs are simply decoded and returned as strings.
"""
if ldap.dn.is_dn(input_clean):
return ldap.dn.str2dn(input_clean, flags=ldap.DN_FORMAT_LDAPV3)[0][0][1]
else:
return input_clean.decode()

View File

@ -1,13 +1,27 @@
import pytest
from datahub.ingestion.source.ldap import parse_from_attrs, strip_ldap_info
from datahub.ingestion.source.ldap import parse_groups, parse_ldap_dn, parse_users
def test_strip_ldap_info():
assert (
strip_ldap_info(b"uid=firstname.surname,ou=People,dc=internal,dc=machines")
== "firstname.surname"
)
@pytest.mark.parametrize(
"input, expected",
[
(
b"uid=firstname.surname,ou=People,dc=internal,dc=machines",
"firstname.surname",
),
(
b"cn=group_name,ou=Groups,dc=internal,dc=machines",
"group_name",
),
(
b"cn=comma group (one\\, two\\, three),ou=Groups,dc=internal,dc=machines",
"comma group (one, two, three)",
),
],
)
def test_parse_ldap_dn(input, expected):
assert parse_ldap_dn(input) == expected
@pytest.mark.parametrize(
@ -32,11 +46,43 @@ def test_strip_ldap_info():
),
],
)
def test_parse_from_attrs(input, expected):
def test_parse_users(input, expected):
assert (
parse_from_attrs(
parse_users(
input,
"admins",
)
== expected
)
@pytest.mark.parametrize(
"input, expected",
[
(
{
"memberOf": [
b"cn=group1,ou=Groups,dc=internal,dc=machines",
b"cn=group2,ou=Groups,dc=internal,dc=machines",
]
},
["urn:li:corpGroup:group1", "urn:li:corpGroup:group2"],
),
(
{
"not_member": [
b"doesntmatter",
]
},
[],
),
],
)
def test_parse_groups(input, expected):
assert (
parse_groups(
input,
"memberOf",
)
== expected
)