# # Copyright 2015 LinkedIn Corp. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # from org.slf4j import LoggerFactory from javax.naming.directory import InitialDirContext from javax.naming import Context from javax.naming.directory import SearchControls from javax.naming.directory import BasicAttributes from wherehows.common import Constant import csv, re, os, sys, json from java.util import Hashtable from java.io import FileWriter class LdapExtract: def __init__(self, args): self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) self.args = args self.app_id = int(args[Constant.APP_ID_KEY]) self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY]) self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY]) self.app_folder = args[Constant.WH_APP_FOLDER_KEY] self.metadata_folder = self.app_folder + "/" + str(self.app_id) if not os.path.exists(self.metadata_folder): try: os.makedirs(self.metadata_folder) except Exception as e: self.logger.error(e) self.ldap_user = set() self.group_map = dict() self.group_flatten_map = dict() def fetch_ldap_user(self, file): """ fetch ldap user from ldap server :param file: output file name """ # Setup LDAP Context Options settings = Hashtable() settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_CONTEXT_FACTORY_KEY]) settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_CONTEXT_PROVIDER_URL_KEY]) settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_CONTEXT_SECURITY_PRINCIPAL_KEY]) settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_CONTEXT_SECURITY_CREDENTIALS_KEY]) # Connect to LDAP Server ctx = InitialDirContext(settings) # load the java Hashtable out of the ldap server # Query starting point and query target search_target = '(objectClass=person)' return_attributes_standard = ['user_id', 'distinct_name', 'name', 'display_name', 'title', 'employee_number', 'manager', 'mail', 'department_number', 'department', 'start_date', 'mobile'] return_attributes_actual = json.loads(self.args[Constant.LDAP_SEARCH_RETURN_ATTRS_KEY]) return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual)) ctls = SearchControls() ctls.setReturningAttributes(return_attributes_actual) ctls.setSearchScope(SearchControls.SUBTREE_SCOPE) ldap_records = [] # domain format should look like : ['OU=domain1','OU=domain2','OU=domain3,OU=subdomain3'] org_units = json.loads(self.args[Constant.LDAP_SEARCH_DOMAINS_KEY]) for search_unit in org_units: search_result = ctx.search(search_unit, search_target, ctls) # print search_return_attributes for person in search_result: ldap_user_tuple = [self.app_id] if search_unit == self.args[Constant.LDAP_INACTIVE_DOMAIN_KEY]: ldap_user_tuple.append('N') else: ldap_user_tuple.append('Y') person_attributes = person.getAttributes() user_id = person_attributes.get(return_attributes_map['user_id']) user_id = re.sub(r"\r|\n", '', user_id.get(0)).strip().encode('utf8') self.ldap_user.add(user_id) for attr_name in return_attributes_actual: attr = person_attributes.get(attr_name) if attr: attr = re.sub(r"\r|\n", '', attr.get(0)).strip().encode('utf8') # special fix for start_date if attr_name == return_attributes_map['start_date'] and len(attr) == 4: attr += '0101' ldap_user_tuple.append(attr) else: ldap_user_tuple.append("") ldap_user_tuple.append(self.wh_exec_id) ldap_records.append(ldap_user_tuple) self.logger.info("%d records found in ldap search" % (len(self.ldap_user))) csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n") csv_writer.writerows(ldap_records) def fetch_ldap_group(self, file): """ fetch group mapping from group ldap server :param file: output file name """ settings = Hashtable() settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_GROUP_CONTEXT_FACTORY_KEY]) settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_GROUP_CONTEXT_PROVIDER_URL_KEY]) settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_PRINCIPAL_KEY]) settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_CREDENTIALS_KEY]) ctx = InitialDirContext(settings) search_target = "(objectClass=posixGroup)" return_attributes_standard = ['group_id', 'member_ids'] return_attributes_actual = json.loads(self.args[Constant.LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY]) return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual)) ctls = SearchControls() ctls.setReturningAttributes(return_attributes_actual) ctls.setSearchScope(SearchControls.SUBTREE_SCOPE) ldap_records = [] org_units = json.loads(self.args[Constant.LDAP_GROUP_SEARCH_DOMAINS_KEY]) for search_unit in org_units: results = ctx.search(search_unit, search_target, ctls) for r in results: person_attributes = r.getAttributes() group = person_attributes.get(return_attributes_map['group_id']).get(0) group = re.sub(r"\r|\n", '', group).strip().encode('utf8') # skip special group that contains all group users if group == 'users': continue members = person_attributes.get(return_attributes_map['member_ids']) if members: self.group_map[group] = members sort_id = 0 for member in members.getAll(): member = re.sub(r"\r|\n", '', member).strip().encode('utf8') ldap_group_tuple = [self.group_app_id] ldap_group_tuple.append(group) ldap_group_tuple.append(sort_id) if member in self.ldap_user: ldap_group_tuple.append(self.app_id) else: ldap_group_tuple.append(self.group_app_id) ldap_group_tuple.append(member) ldap_group_tuple.append(self.wh_exec_id) ldap_records.append(ldap_group_tuple) sort_id += 1 else: pass self.logger.info("%d records found in group accounts" % (len(self.group_map))) csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n") csv_writer.writerows(ldap_records) def fetch_ldap_group_flatten(self, file): """ Flatten the group - user map by recursive extending inner-group members :param file: output file name """ ldap_records = [] for group in self.group_map: all_users = self.get_all_users_for_group(group, self.ldap_user, self.group_map, set()) self.group_flatten_map[group] = all_users sort_id = 0 for u in all_users: ldap_group_flatten_tuple = [self.group_app_id] ldap_group_flatten_tuple.append(group) ldap_group_flatten_tuple.append(sort_id) ldap_group_flatten_tuple.append(self.app_id) ldap_group_flatten_tuple.append(u) ldap_group_flatten_tuple.append(self.wh_exec_id) ldap_records.append(ldap_group_flatten_tuple) sort_id += 1 csv_writer = csv.writer(open(file, "w"), delimiter='', quoting=csv.QUOTE_MINIMAL, lineterminator="\n") csv_writer.writerows(ldap_records) def get_all_users_for_group(self, current, user_set, group_map, previous): """ Recursive method that calculate all users for current group :param current: current group name :param user_set: the user set that contains all user ids :param group_map: the original group user map before extend :param previous: previous visited group name :return: ordered list of users """ ret = [] # base condition if current in user_set: ret.append(current) return ret # cyclic condition if current in previous: return ret # avoid duplicate computation if current in self.group_flatten_map: return self.group_flatten_map[current] # current is a group if current in group_map: members = group_map[current] previous.add(current) for member in members.getAll(): member = re.sub(r"\r|\n", '', member).strip().encode('utf8') next_ret = self.get_all_users_for_group(member, user_set, group_map, previous) for i in next_ret: if i not in ret: ret.append(i) return ret def run(self): self.fetch_ldap_user(self.metadata_folder + "/ldap_user_record.csv") self.fetch_ldap_group(self.metadata_folder + "/ldap_group_record.csv") self.fetch_ldap_group_flatten(self.metadata_folder + "/ldap_group_flatten_record.csv") if __name__ == "__main__": props = sys.argv[1] ldap = LdapExtract(props) ldap.run()