mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-28 02:17:53 +00:00
fix(ldap): properly handle escaped characters in LDAP DNs (#7928)
This commit is contained in:
parent
e5dad3ab79
commit
b6e2cc549a
@ -1,6 +1,5 @@
|
||||
"""LDAP Source"""
|
||||
import dataclasses
|
||||
import re
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
import ldap
|
||||
@ -415,8 +414,8 @@ class LDAPSource(StatefulIngestionSourceBase):
|
||||
cn = attrs.get(self.config.group_attrs_map["urn"])
|
||||
if cn:
|
||||
full_name = cn[0].decode()
|
||||
admins = parse_from_attrs(attrs, self.config.group_attrs_map["admins"])
|
||||
members = parse_from_attrs(attrs, self.config.group_attrs_map["members"])
|
||||
admins = parse_users(attrs, self.config.group_attrs_map["admins"])
|
||||
members = parse_users(attrs, self.config.group_attrs_map["members"])
|
||||
email = (
|
||||
attrs[self.config.group_attrs_map["email"]][0].decode()
|
||||
if self.config.group_attrs_map["email"] in attrs
|
||||
@ -457,33 +456,33 @@ class LDAPSource(StatefulIngestionSourceBase):
|
||||
super().close()
|
||||
|
||||
|
||||
def parse_from_attrs(attrs: Dict[str, Any], filter_key: str) -> List[str]:
|
||||
"""Converts a list of LDAP formats to Datahub corpuser strings."""
|
||||
def parse_users(attrs: Dict[str, Any], filter_key: str) -> List[str]:
|
||||
"""Converts a list of LDAP DNs to Datahub corpuser strings."""
|
||||
if filter_key in attrs:
|
||||
return [
|
||||
f"urn:li:corpuser:{strip_ldap_info(ldap_user)}"
|
||||
f"urn:li:corpuser:{parse_ldap_dn(ldap_user)}"
|
||||
for ldap_user in attrs[filter_key]
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def strip_ldap_info(input_clean: bytes) -> str:
|
||||
"""Converts a b'uid=username,ou=Groups,dc=internal,dc=machines'
|
||||
format to username"""
|
||||
return input_clean.decode().split(",")[0].lstrip("uid=")
|
||||
|
||||
|
||||
def parse_groups(attrs: Dict[str, Any], filter_key: str) -> List[str]:
|
||||
"""Converts a list of LDAP groups to Datahub corpgroup strings"""
|
||||
"""Converts a list of LDAP DNs to Datahub corpgroup strings"""
|
||||
if filter_key in attrs:
|
||||
return [
|
||||
f"urn:li:corpGroup:{strip_ldap_group_cn(ldap_group)}"
|
||||
f"urn:li:corpGroup:{parse_ldap_dn(ldap_group)}"
|
||||
for ldap_group in attrs[filter_key]
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def strip_ldap_group_cn(input_clean: bytes) -> str:
|
||||
"""Converts a b'cn=group_name,ou=Groups,dc=internal,dc=machines'
|
||||
format to group name"""
|
||||
return re.sub("cn=", "", input_clean.decode().split(",")[0], flags=re.IGNORECASE)
|
||||
def parse_ldap_dn(input_clean: bytes) -> str:
|
||||
"""
|
||||
Converts an LDAP DN of format b'cn=group_name,ou=Groups,dc=internal,dc=machines'
|
||||
or b'uid=username,ou=Groups,dc=internal,dc=machines' to group name or username.
|
||||
Inputs which are not valid LDAP DNs are simply decoded and returned as strings.
|
||||
"""
|
||||
if ldap.dn.is_dn(input_clean):
|
||||
return ldap.dn.str2dn(input_clean, flags=ldap.DN_FORMAT_LDAPV3)[0][0][1]
|
||||
else:
|
||||
return input_clean.decode()
|
||||
|
||||
@ -1,13 +1,27 @@
|
||||
import pytest
|
||||
|
||||
from datahub.ingestion.source.ldap import parse_from_attrs, strip_ldap_info
|
||||
from datahub.ingestion.source.ldap import parse_groups, parse_ldap_dn, parse_users
|
||||
|
||||
|
||||
def test_strip_ldap_info():
|
||||
assert (
|
||||
strip_ldap_info(b"uid=firstname.surname,ou=People,dc=internal,dc=machines")
|
||||
== "firstname.surname"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"input, expected",
|
||||
[
|
||||
(
|
||||
b"uid=firstname.surname,ou=People,dc=internal,dc=machines",
|
||||
"firstname.surname",
|
||||
),
|
||||
(
|
||||
b"cn=group_name,ou=Groups,dc=internal,dc=machines",
|
||||
"group_name",
|
||||
),
|
||||
(
|
||||
b"cn=comma group (one\\, two\\, three),ou=Groups,dc=internal,dc=machines",
|
||||
"comma group (one, two, three)",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parse_ldap_dn(input, expected):
|
||||
assert parse_ldap_dn(input) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
@ -32,11 +46,43 @@ def test_strip_ldap_info():
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parse_from_attrs(input, expected):
|
||||
def test_parse_users(input, expected):
|
||||
assert (
|
||||
parse_from_attrs(
|
||||
parse_users(
|
||||
input,
|
||||
"admins",
|
||||
)
|
||||
== expected
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input, expected",
|
||||
[
|
||||
(
|
||||
{
|
||||
"memberOf": [
|
||||
b"cn=group1,ou=Groups,dc=internal,dc=machines",
|
||||
b"cn=group2,ou=Groups,dc=internal,dc=machines",
|
||||
]
|
||||
},
|
||||
["urn:li:corpGroup:group1", "urn:li:corpGroup:group2"],
|
||||
),
|
||||
(
|
||||
{
|
||||
"not_member": [
|
||||
b"doesntmatter",
|
||||
]
|
||||
},
|
||||
[],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parse_groups(input, expected):
|
||||
assert (
|
||||
parse_groups(
|
||||
input,
|
||||
"memberOf",
|
||||
)
|
||||
== expected
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user