mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-12 09:25:23 +00:00
feat(ingestion): add groups to ldap users (#5470)
This commit is contained in:
parent
2163d75c83
commit
c182fe7fca
@ -1,5 +1,6 @@
|
|||||||
"""LDAP Source"""
|
"""LDAP Source"""
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
import re
|
||||||
from typing import Any, Dict, Iterable, List, Optional
|
from typing import Any, Dict, Iterable, List, Optional
|
||||||
|
|
||||||
import ldap
|
import ldap
|
||||||
@ -22,6 +23,7 @@ from datahub.metadata.schema_classes import (
|
|||||||
CorpGroupSnapshotClass,
|
CorpGroupSnapshotClass,
|
||||||
CorpUserInfoClass,
|
CorpUserInfoClass,
|
||||||
CorpUserSnapshotClass,
|
CorpUserSnapshotClass,
|
||||||
|
GroupMembershipClass,
|
||||||
)
|
)
|
||||||
|
|
||||||
# default mapping for attrs
|
# default mapping for attrs
|
||||||
@ -42,6 +44,7 @@ user_attrs_map["departmentId"] = "departmentNumber"
|
|||||||
user_attrs_map["title"] = "title"
|
user_attrs_map["title"] = "title"
|
||||||
user_attrs_map["departmentName"] = "departmentNumber"
|
user_attrs_map["departmentName"] = "departmentNumber"
|
||||||
user_attrs_map["countryCode"] = "countryCode"
|
user_attrs_map["countryCode"] = "countryCode"
|
||||||
|
user_attrs_map["memberOf"] = "memberOf"
|
||||||
|
|
||||||
# group related attrs
|
# group related attrs
|
||||||
group_attrs_map["urn"] = "cn"
|
group_attrs_map["urn"] = "cn"
|
||||||
@ -94,6 +97,7 @@ class LDAPSourceConfig(ConfigModel):
|
|||||||
# Extraction configuration.
|
# Extraction configuration.
|
||||||
base_dn: str = Field(description="LDAP DN.")
|
base_dn: str = Field(description="LDAP DN.")
|
||||||
filter: str = Field(default="(objectClass=*)", description="LDAP extractor filter.")
|
filter: str = Field(default="(objectClass=*)", description="LDAP extractor filter.")
|
||||||
|
attrs_list: List[str] = Field(default=None, description="Retrieved attributes list")
|
||||||
|
|
||||||
# If set to true, any users without first and last names will be dropped.
|
# If set to true, any users without first and last names will be dropped.
|
||||||
drop_missing_first_last_name: bool = Field(
|
drop_missing_first_last_name: bool = Field(
|
||||||
@ -201,6 +205,7 @@ class LDAPSource(Source):
|
|||||||
self.config.base_dn,
|
self.config.base_dn,
|
||||||
ldap.SCOPE_SUBTREE,
|
ldap.SCOPE_SUBTREE,
|
||||||
self.config.filter,
|
self.config.filter,
|
||||||
|
self.config.attrs_list,
|
||||||
serverctrls=[self.lc],
|
serverctrls=[self.lc],
|
||||||
)
|
)
|
||||||
_rtype, rdata, _rmsgid, serverctrls = self.ldap_client.result3(msgid)
|
_rtype, rdata, _rmsgid, serverctrls = self.ldap_client.result3(msgid)
|
||||||
@ -228,6 +233,7 @@ class LDAPSource(Source):
|
|||||||
elif (
|
elif (
|
||||||
b"posixGroup" in attrs["objectClass"]
|
b"posixGroup" in attrs["objectClass"]
|
||||||
or b"organizationalUnit" in attrs["objectClass"]
|
or b"organizationalUnit" in attrs["objectClass"]
|
||||||
|
or b"groupOfNames" in attrs["objectClass"]
|
||||||
or b"group" in attrs["objectClass"]
|
or b"group" in attrs["objectClass"]
|
||||||
):
|
):
|
||||||
yield from self.handle_group(dn, attrs)
|
yield from self.handle_group(dn, attrs)
|
||||||
@ -301,6 +307,7 @@ class LDAPSource(Source):
|
|||||||
full_name = attrs[self.config.user_attrs_map["fullName"]][0].decode()
|
full_name = attrs[self.config.user_attrs_map["fullName"]][0].decode()
|
||||||
first_name = attrs[self.config.user_attrs_map["firstName"]][0].decode()
|
first_name = attrs[self.config.user_attrs_map["firstName"]][0].decode()
|
||||||
last_name = attrs[self.config.user_attrs_map["lastName"]][0].decode()
|
last_name = attrs[self.config.user_attrs_map["lastName"]][0].decode()
|
||||||
|
groups = parse_groups(attrs, self.config.user_attrs_map["memberOf"])
|
||||||
|
|
||||||
email = (
|
email = (
|
||||||
(attrs[self.config.user_attrs_map["email"]][0]).decode()
|
(attrs[self.config.user_attrs_map["email"]][0]).decode()
|
||||||
@ -334,27 +341,30 @@ class LDAPSource(Source):
|
|||||||
)
|
)
|
||||||
manager_urn = f"urn:li:corpuser:{manager_ldap}" if manager_ldap else None
|
manager_urn = f"urn:li:corpuser:{manager_ldap}" if manager_ldap else None
|
||||||
|
|
||||||
return MetadataChangeEvent(
|
user_snapshot = CorpUserSnapshotClass(
|
||||||
proposedSnapshot=CorpUserSnapshotClass(
|
urn=f"urn:li:corpuser:{ldap_user}",
|
||||||
urn=f"urn:li:corpuser:{ldap_user}",
|
aspects=[
|
||||||
aspects=[
|
CorpUserInfoClass(
|
||||||
CorpUserInfoClass(
|
active=True,
|
||||||
active=True,
|
email=email,
|
||||||
email=email,
|
fullName=full_name,
|
||||||
fullName=full_name,
|
firstName=first_name,
|
||||||
firstName=first_name,
|
lastName=last_name,
|
||||||
lastName=last_name,
|
departmentId=department_id,
|
||||||
departmentId=department_id,
|
departmentName=department_name,
|
||||||
departmentName=department_name,
|
displayName=display_name,
|
||||||
displayName=display_name,
|
countryCode=country_code,
|
||||||
countryCode=country_code,
|
title=title,
|
||||||
title=title,
|
managerUrn=manager_urn,
|
||||||
managerUrn=manager_urn,
|
)
|
||||||
)
|
],
|
||||||
],
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if groups:
|
||||||
|
user_snapshot.aspects.append(GroupMembershipClass(groups=groups))
|
||||||
|
|
||||||
|
return MetadataChangeEvent(proposedSnapshot=user_snapshot)
|
||||||
|
|
||||||
def build_corp_group_mce(self, attrs: dict) -> Optional[MetadataChangeEvent]:
|
def build_corp_group_mce(self, attrs: dict) -> Optional[MetadataChangeEvent]:
|
||||||
"""Creates a MetadataChangeEvent for LDAP groups."""
|
"""Creates a MetadataChangeEvent for LDAP groups."""
|
||||||
cn = attrs.get(self.config.group_attrs_map["urn"])
|
cn = attrs.get(self.config.group_attrs_map["urn"])
|
||||||
@ -417,3 +427,19 @@ def strip_ldap_info(input_clean: bytes) -> str:
|
|||||||
"""Converts a b'uid=username,ou=Groups,dc=internal,dc=machines'
|
"""Converts a b'uid=username,ou=Groups,dc=internal,dc=machines'
|
||||||
format to username"""
|
format to username"""
|
||||||
return input_clean.decode().split(",")[0].lstrip("uid=")
|
return input_clean.decode().split(",")[0].lstrip("uid=")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_groups(attrs: Dict[str, Any], filter_key: str) -> List[str]:
|
||||||
|
"""Converts a list of LDAP groups to Datahub corpgroup strings"""
|
||||||
|
if filter_key in attrs:
|
||||||
|
return [
|
||||||
|
f"urn:li:corpGroup:{strip_ldap_group_cn(ldap_group)}"
|
||||||
|
for ldap_group in attrs[filter_key]
|
||||||
|
]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def strip_ldap_group_cn(input_clean: bytes) -> str:
|
||||||
|
"""Converts a b'cn=group_name,ou=Groups,dc=internal,dc=machines'
|
||||||
|
format to group name"""
|
||||||
|
return re.sub("cn=", "", input_clean.decode().split(",")[0], flags=re.IGNORECASE)
|
||||||
|
@ -151,5 +151,93 @@
|
|||||||
"runId": "ldap-test",
|
"runId": "ldap-test",
|
||||||
"properties": null
|
"properties": null
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
|
||||||
|
"urn": "urn:li:corpuser:hbevan",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
|
||||||
|
"active": true,
|
||||||
|
"displayName": "Hester Bevan",
|
||||||
|
"email": "hbevan",
|
||||||
|
"title": null,
|
||||||
|
"managerUrn": null,
|
||||||
|
"departmentId": null,
|
||||||
|
"departmentName": null,
|
||||||
|
"firstName": "Hester",
|
||||||
|
"lastName": "Bevan",
|
||||||
|
"fullName": "Hester Bevan",
|
||||||
|
"countryCode": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1615443388097,
|
||||||
|
"runId": "ldap-test",
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
|
||||||
|
"urn": "urn:li:corpuser:ehaas",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
|
||||||
|
"active": true,
|
||||||
|
"displayName": "Evalyn Haas",
|
||||||
|
"email": "ehaas",
|
||||||
|
"title": null,
|
||||||
|
"managerUrn": null,
|
||||||
|
"departmentId": null,
|
||||||
|
"departmentName": null,
|
||||||
|
"firstName": "Evalyn",
|
||||||
|
"lastName": "Haas",
|
||||||
|
"fullName": "Evalyn Haas",
|
||||||
|
"countryCode": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1615443388097,
|
||||||
|
"runId": "ldap-test",
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot": {
|
||||||
|
"urn": "urn:li:corpGroup:HR Department",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpGroupInfo": {
|
||||||
|
"displayName": null,
|
||||||
|
"email": "HR Department",
|
||||||
|
"admins": [],
|
||||||
|
"members": [],
|
||||||
|
"groups": [],
|
||||||
|
"description": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1615443388097,
|
||||||
|
"runId": "ldap-test",
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
@ -0,0 +1,82 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
|
||||||
|
"urn": "urn:li:corpuser:hbevan",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
|
||||||
|
"active": true,
|
||||||
|
"displayName": "Hester Bevan",
|
||||||
|
"email": "hbevan",
|
||||||
|
"title": null,
|
||||||
|
"managerUrn": null,
|
||||||
|
"departmentId": null,
|
||||||
|
"departmentName": null,
|
||||||
|
"firstName": "Hester",
|
||||||
|
"lastName": "Bevan",
|
||||||
|
"fullName": "Hester Bevan",
|
||||||
|
"countryCode": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.GroupMembership": {
|
||||||
|
"groups": [
|
||||||
|
"urn:li:corpGroup:HR Department"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1615443388097,
|
||||||
|
"runId": "ldap-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"auditHeader": null,
|
||||||
|
"proposedSnapshot": {
|
||||||
|
"com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
|
||||||
|
"urn": "urn:li:corpuser:ehaas",
|
||||||
|
"aspects": [
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.CorpUserInfo": {
|
||||||
|
"active": true,
|
||||||
|
"displayName": "Evalyn Haas",
|
||||||
|
"email": "ehaas",
|
||||||
|
"title": null,
|
||||||
|
"managerUrn": null,
|
||||||
|
"departmentId": null,
|
||||||
|
"departmentName": null,
|
||||||
|
"firstName": "Evalyn",
|
||||||
|
"lastName": "Haas",
|
||||||
|
"fullName": "Evalyn Haas",
|
||||||
|
"countryCode": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"com.linkedin.pegasus2avro.identity.GroupMembership": {
|
||||||
|
"groups": [
|
||||||
|
"urn:li:corpGroup:HR Department"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"proposedDelta": null,
|
||||||
|
"systemMetadata": {
|
||||||
|
"lastObserved": 1615443388097,
|
||||||
|
"runId": "ldap-test",
|
||||||
|
"registryName": null,
|
||||||
|
"registryVersion": null,
|
||||||
|
"properties": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
@ -9,6 +9,19 @@
|
|||||||
|
|
||||||
version: 1
|
version: 1
|
||||||
|
|
||||||
|
# Enable memberOf overlay
|
||||||
|
dn: olcOverlay=memberof,olcDatabase={1}mdb,cn=config
|
||||||
|
objectClass: olcMemberOf
|
||||||
|
objectClass: olcOverlayConfig
|
||||||
|
objectClass: olcConfig
|
||||||
|
objectClass: top
|
||||||
|
olcOverlay: memberof
|
||||||
|
olcMemberOfDangling: ignore
|
||||||
|
olcMemberOfRefInt: TRUE
|
||||||
|
olcMemberOfGroupOC: groupOfNames
|
||||||
|
olcMemberOfMemberAD: member
|
||||||
|
olcMemberOfMemberOfAD: memberOf
|
||||||
|
|
||||||
# Entry 1: dc=example,dc=org
|
# Entry 1: dc=example,dc=org
|
||||||
# Note: this is commented out because the containers bootstrap scripts already
|
# Note: this is commented out because the containers bootstrap scripts already
|
||||||
# handle this for us.
|
# handle this for us.
|
||||||
@ -113,3 +126,39 @@ dn: ou=Sales Department,dc=example,dc=org
|
|||||||
objectclass: organizationalUnit
|
objectclass: organizationalUnit
|
||||||
objectclass: top
|
objectclass: top
|
||||||
ou: Sales Department
|
ou: Sales Department
|
||||||
|
|
||||||
|
# Entry 10: cn=Hester Bevan,ou=people,dc=example,dc=org
|
||||||
|
dn: cn=Hester Bevan,ou=people,dc=example,dc=org
|
||||||
|
cn: Hester Bevan
|
||||||
|
gidnumber: 500
|
||||||
|
givenname: Hester
|
||||||
|
homedirectory: /home/users/hbevan
|
||||||
|
objectclass: inetOrgPerson
|
||||||
|
objectclass: posixAccount
|
||||||
|
objectclass: top
|
||||||
|
sn: Bevan
|
||||||
|
uid: hbevan
|
||||||
|
uidnumber: 1004
|
||||||
|
userpassword: {MD5}4QrcOUm6Wau+VuBX8g+IPg==
|
||||||
|
|
||||||
|
# Entry 11: cn=Evalyn Haas,ou=people,dc=example,dc=org
|
||||||
|
dn: cn=Evalyn Haas,ou=people,dc=example,dc=org
|
||||||
|
cn: Evalyn Haas
|
||||||
|
gidnumber: 500
|
||||||
|
givenname: Evalyn
|
||||||
|
homedirectory: /home/users/ehaas
|
||||||
|
objectclass: inetOrgPerson
|
||||||
|
objectclass: posixAccount
|
||||||
|
objectclass: top
|
||||||
|
sn: Haas
|
||||||
|
uid: ehaas
|
||||||
|
uidnumber: 1005
|
||||||
|
userpassword: {MD5}4QrcOUm6Wau+VuBX8g+IPg==
|
||||||
|
|
||||||
|
# Entry 12: cn=HR Department,ou=groups,dc=example,dc=org
|
||||||
|
dn: cn=HR Department,dc=example,dc=org
|
||||||
|
cn: HR Department
|
||||||
|
objectclass: groupOfNames
|
||||||
|
objectclass: top
|
||||||
|
member: cn=Hester Bevan,ou=people,dc=example,dc=org
|
||||||
|
member: cn=Evalyn Haas,ou=people,dc=example,dc=org
|
@ -50,3 +50,50 @@ def test_ldap_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
|
|||||||
output_path=tmp_path / "ldap_mces.json",
|
output_path=tmp_path / "ldap_mces.json",
|
||||||
golden_path=test_resources_dir / "ldap_mces_golden.json",
|
golden_path=test_resources_dir / "ldap_mces_golden.json",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
def test_ldap_memberof_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
|
||||||
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/ldap"
|
||||||
|
|
||||||
|
with docker_compose_runner(
|
||||||
|
test_resources_dir / "docker-compose.yml", "ldap"
|
||||||
|
) as docker_services:
|
||||||
|
# The openldap container loads the sample data after exposing the port publicly. As such,
|
||||||
|
# we must wait a little bit extra to ensure that the sample data is loaded.
|
||||||
|
wait_for_port(docker_services, "openldap", 389)
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
pipeline = Pipeline.create(
|
||||||
|
{
|
||||||
|
"run_id": "ldap-test",
|
||||||
|
"source": {
|
||||||
|
"type": "ldap",
|
||||||
|
"config": {
|
||||||
|
"ldap_server": "ldap://localhost",
|
||||||
|
"ldap_user": "cn=admin,dc=example,dc=org",
|
||||||
|
"ldap_password": "admin",
|
||||||
|
"base_dn": "dc=example,dc=org",
|
||||||
|
"filter": "(memberOf=cn=HR Department,dc=example,dc=org)",
|
||||||
|
"attrs_list": ["+", "*"],
|
||||||
|
"group_attrs_map": {
|
||||||
|
"members": "member",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"sink": {
|
||||||
|
"type": "file",
|
||||||
|
"config": {
|
||||||
|
"filename": f"{tmp_path}/ldap_memberof_mces.json",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
pipeline.run()
|
||||||
|
pipeline.raise_from_status()
|
||||||
|
|
||||||
|
mce_helpers.check_golden_file(
|
||||||
|
pytestconfig,
|
||||||
|
output_path=tmp_path / "ldap_memberof_mces.json",
|
||||||
|
golden_path=test_resources_dir / "ldap_memberof_mces_golden.json",
|
||||||
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user