239 lines
10 KiB
Python
Raw Normal View History

2015-12-07 15:27:20 -08:00
#
# Copyright 2015 LinkedIn Corp. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
from javax.naming.directory import InitialDirContext
from javax.naming import Context
from javax.naming.directory import SearchControls
from javax.naming.directory import BasicAttributes
from wherehows.common import Constant
import csv, re, os, sys
from java.util import Hashtable
from java.io import FileWriter
class LdapExtract:
def __init__(self, args):
self.args = args
self.app_id = int(args[Constant.APP_ID_KEY])
self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY])
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
if not os.path.exists(self.metadata_folder):
try:
os.makedirs(self.metadata_folder)
except Exception as e:
print e
self.ldap_user = set()
self.group_map = dict()
self.group_flatten_map = dict()
def split_property(self, property_value):
return re.split('\s*\'\s*,\s*\'\s*', property_value.strip('\' \t\n\r\f\v'))
def fetch_ldap_user(self, file):
"""
fetch ldap user from ldap server
:param file: output file name
"""
# Setup LDAP Context Options
settings = Hashtable()
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_CONTEXT_FACTORY_KEY])
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_CONTEXT_PROVIDER_URL_KEY])
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_CONTEXT_SECURITY_PRINCIPAL_KEY])
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_CONTEXT_SECURITY_CREDENTIALS_KEY])
# Connect to LDAP Server
ctx = InitialDirContext(settings)
# load the java Hashtable out of the ldap server
# Query starting point and query target
search_target = '(objectClass=person)'
return_attributes_standard = ['user_id', 'distinct_name', 'name', 'display_name', 'title', 'employee_number', 'manager', 'mail', 'department_number', 'department', 'start_date', 'mobile']
return_attributes_actual = self.split_property(self.args[Constant.LDAP_SEARCH_RETURN_ATTRS_KEY])
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
ctls = SearchControls()
ctls.setReturningAttributes(return_attributes_actual)
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
ldap_records = []
# domain format should look like : 'OU=domain1','OU=domain2','OU=domain3,OU=subdomain3'
org_units = self.split_property(self.args[Constant.LDAP_SEARCH_DOMAINS_KEY])
for search_unit in org_units:
search_result = ctx.search(search_unit, search_target, ctls)
# print search_return_attributes
for person in search_result:
ldap_user_tuple = [self.app_id]
if search_unit == self.args[Constant.LDAP_INACTIVE_DOMAIN_KEY]:
ldap_user_tuple.append('N')
else:
ldap_user_tuple.append('Y')
person_attributes = person.getAttributes()
user_id = person_attributes.get(return_attributes_map['user_id'])
user_id = re.sub(r"\r|\n", '', user_id.get(0)).strip().encode('utf8')
self.ldap_user.add(user_id)
for attr_name in return_attributes_actual:
attr = person_attributes.get(attr_name)
if attr:
attr = re.sub(r"\r|\n", '', attr.get(0)).strip().encode('utf8')
# special fix for start_date
if attr_name == return_attributes_map['start_date'] and len(attr) == 4:
attr += '0101'
ldap_user_tuple.append(attr)
else:
ldap_user_tuple.append("")
ldap_user_tuple.append(self.wh_exec_id)
ldap_records.append(ldap_user_tuple)
print "%d records found in ldap search" % (len(self.ldap_user))
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
csv_writer.writerows(ldap_records)
def fetch_ldap_group(self, file):
"""
fetch group mapping from group ldap server
:param file: output file name
"""
settings = Hashtable()
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_GROUP_CONTEXT_FACTORY_KEY])
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_GROUP_CONTEXT_PROVIDER_URL_KEY])
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_PRINCIPAL_KEY])
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_CREDENTIALS_KEY])
ctx = InitialDirContext(settings)
search_target = "(objectClass=posixGroup)"
return_attributes_standard = ['group_id', 'member_ids']
return_attributes_actual = self.split_property(self.args[Constant.LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY])
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
ctls = SearchControls()
ctls.setReturningAttributes(return_attributes_actual)
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
ldap_records = []
org_units = self.split_property(self.args[Constant.LDAP_GROUP_SEARCH_DOMAINS_KEY])
for search_unit in org_units:
results = ctx.search(search_unit, search_target, ctls)
for r in results:
person_attributes = r.getAttributes()
group = person_attributes.get(return_attributes_map['group_id']).get(0)
group = re.sub(r"\r|\n", '', group).strip().encode('utf8')
# skip special group that contains all group users
if group == 'users':
continue
members = person_attributes.get(return_attributes_map['member_ids'])
if members:
self.group_map[group] = members
sort_id = 0
for member in members.getAll():
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
ldap_group_tuple = [self.group_app_id]
ldap_group_tuple.append(group)
ldap_group_tuple.append(sort_id)
if member in self.ldap_user:
ldap_group_tuple.append(self.app_id)
else:
ldap_group_tuple.append(self.group_app_id)
ldap_group_tuple.append(member)
ldap_group_tuple.append(self.wh_exec_id)
ldap_records.append(ldap_group_tuple)
sort_id += 1
else:
pass
print "%d records found in group accounts" % (len(self.group_map))
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
csv_writer.writerows(ldap_records)
def fetch_ldap_group_flatten(self, file):
"""
Flatten the group - user map by recursive extending inner-group members
:param file: output file name
"""
ldap_records = []
for group in self.group_map:
all_users = self.get_all_users_for_group(group, self.ldap_user, self.group_map, set())
self.group_flatten_map[group] = all_users
sort_id = 0
for u in all_users:
ldap_group_flatten_tuple = [self.group_app_id]
ldap_group_flatten_tuple.append(group)
ldap_group_flatten_tuple.append(sort_id)
ldap_group_flatten_tuple.append(self.app_id)
ldap_group_flatten_tuple.append(u)
ldap_group_flatten_tuple.append(self.wh_exec_id)
ldap_records.append(ldap_group_flatten_tuple)
sort_id += 1
csv_writer = csv.writer(open(file, "w"), delimiter='', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
csv_writer.writerows(ldap_records)
def get_all_users_for_group(self, current, user_set, group_map, previous):
"""
Recursive method that calculate all users for current group
:param current: current group name
:param user_set: the user set that contains all user ids
:param group_map: the original group user map before extend
:param previous: previous visited group name
:return: ordered list of users
"""
ret = []
# base condition
if current in user_set:
ret.append(current)
return ret
# cyclic condition
if current in previous:
return ret
# avoid duplicate computation
if current in self.group_flatten_map:
return self.group_flatten_map[current]
# current is a group
if current in group_map:
members = group_map[current]
previous.add(current)
for member in members.getAll():
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
next_ret = self.get_all_users_for_group(member, user_set, group_map, previous)
for i in next_ret:
if i not in ret:
ret.append(i)
return ret
def run(self):
self.fetch_ldap_user(self.metadata_folder + "/ldap_user_record.csv")
self.fetch_ldap_group(self.metadata_folder + "/ldap_group_record.csv")
self.fetch_ldap_group_flatten(self.metadata_folder + "/ldap_group_flatten_record.csv")
if __name__ == "__main__":
props = sys.argv[1]
ldap = LdapExtract(props)
ldap.run()