2015-12-07 15:27:20 -08:00
|
|
|
|
#
|
|
|
|
|
|
# Copyright 2015 LinkedIn Corp. All rights reserved.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
|
#
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
#
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
|
#
|
|
|
|
|
|
|
2016-02-03 19:22:18 -08:00
|
|
|
|
from org.slf4j import LoggerFactory
|
2015-12-07 15:27:20 -08:00
|
|
|
|
from javax.naming.directory import InitialDirContext
|
2016-05-04 17:15:45 -07:00
|
|
|
|
from javax.naming.ldap import InitialLdapContext
|
2015-12-07 15:27:20 -08:00
|
|
|
|
from javax.naming import Context
|
|
|
|
|
|
from javax.naming.directory import SearchControls
|
|
|
|
|
|
from javax.naming.directory import BasicAttributes
|
2016-05-04 17:15:45 -07:00
|
|
|
|
from javax.naming.ldap import Control
|
|
|
|
|
|
from javax.naming.ldap import PagedResultsControl
|
|
|
|
|
|
from javax.naming.ldap import PagedResultsResponseControl
|
2015-12-07 15:27:20 -08:00
|
|
|
|
from wherehows.common import Constant
|
|
|
|
|
|
|
2016-05-03 15:17:38 -07:00
|
|
|
|
import csv, re, os, sys, json
|
2015-12-07 15:27:20 -08:00
|
|
|
|
from java.util import Hashtable
|
2016-05-04 17:15:45 -07:00
|
|
|
|
from jarray import zeros, array
|
2015-12-07 15:27:20 -08:00
|
|
|
|
from java.io import FileWriter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LdapExtract:
|
2015-12-16 14:41:47 -08:00
|
|
|
|
def __init__(self, args):
|
2016-02-03 19:22:18 -08:00
|
|
|
|
self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)
|
2015-12-16 14:41:47 -08:00
|
|
|
|
self.args = args
|
|
|
|
|
|
self.app_id = int(args[Constant.APP_ID_KEY])
|
|
|
|
|
|
self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY])
|
|
|
|
|
|
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
|
|
|
|
|
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
|
|
|
|
|
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
|
|
|
|
|
if not os.path.exists(self.metadata_folder):
|
|
|
|
|
|
try:
|
|
|
|
|
|
os.makedirs(self.metadata_folder)
|
|
|
|
|
|
except Exception as e:
|
2016-02-03 19:22:18 -08:00
|
|
|
|
self.logger.error(e)
|
2015-12-16 14:41:47 -08:00
|
|
|
|
|
|
|
|
|
|
self.ldap_user = set()
|
|
|
|
|
|
self.group_map = dict()
|
|
|
|
|
|
self.group_flatten_map = dict()
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_ldap_user(self, file):
|
|
|
|
|
|
"""
|
|
|
|
|
|
fetch ldap user from ldap server
|
|
|
|
|
|
:param file: output file name
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# Setup LDAP Context Options
|
|
|
|
|
|
settings = Hashtable()
|
|
|
|
|
|
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_CONTEXT_FACTORY_KEY])
|
|
|
|
|
|
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_CONTEXT_PROVIDER_URL_KEY])
|
|
|
|
|
|
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_CONTEXT_SECURITY_PRINCIPAL_KEY])
|
|
|
|
|
|
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_CONTEXT_SECURITY_CREDENTIALS_KEY])
|
|
|
|
|
|
|
2016-05-04 17:15:45 -07:00
|
|
|
|
# page the result, each page have fix number of records
|
|
|
|
|
|
pageSize = 5000
|
|
|
|
|
|
pageControl = PagedResultsControl(pageSize, Control.NONCRITICAL)
|
|
|
|
|
|
c_array = array([pageControl], Control)
|
|
|
|
|
|
|
2015-12-16 14:41:47 -08:00
|
|
|
|
# Connect to LDAP Server
|
2016-05-04 17:15:45 -07:00
|
|
|
|
ctx = InitialLdapContext(settings, None)
|
|
|
|
|
|
ctx.setRequestControls(c_array);
|
2015-12-16 14:41:47 -08:00
|
|
|
|
|
|
|
|
|
|
# load the java Hashtable out of the ldap server
|
|
|
|
|
|
# Query starting point and query target
|
|
|
|
|
|
search_target = '(objectClass=person)'
|
2016-02-03 19:22:18 -08:00
|
|
|
|
return_attributes_standard = ['user_id', 'distinct_name', 'name', 'display_name', 'title', 'employee_number',
|
|
|
|
|
|
'manager', 'mail', 'department_number', 'department', 'start_date', 'mobile']
|
2016-05-03 15:17:38 -07:00
|
|
|
|
return_attributes_actual = json.loads(self.args[Constant.LDAP_SEARCH_RETURN_ATTRS_KEY])
|
2015-12-16 14:41:47 -08:00
|
|
|
|
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
|
|
|
|
|
|
|
|
|
|
|
|
ctls = SearchControls()
|
|
|
|
|
|
ctls.setReturningAttributes(return_attributes_actual)
|
|
|
|
|
|
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
|
|
|
|
|
|
ldap_records = []
|
|
|
|
|
|
|
2016-05-03 15:17:38 -07:00
|
|
|
|
# domain format should look like : ['OU=domain1','OU=domain2','OU=domain3,OU=subdomain3']
|
|
|
|
|
|
org_units = json.loads(self.args[Constant.LDAP_SEARCH_DOMAINS_KEY])
|
2015-12-16 14:41:47 -08:00
|
|
|
|
|
2016-05-04 17:15:45 -07:00
|
|
|
|
cookie = None
|
2015-12-16 14:41:47 -08:00
|
|
|
|
for search_unit in org_units:
|
2016-05-04 17:15:45 -07:00
|
|
|
|
# pagination
|
|
|
|
|
|
while True:
|
|
|
|
|
|
# do the search
|
|
|
|
|
|
search_result = ctx.search(search_unit, search_target, ctls)
|
|
|
|
|
|
for person in search_result:
|
|
|
|
|
|
ldap_user_tuple = [self.app_id]
|
|
|
|
|
|
if search_unit == self.args[Constant.LDAP_INACTIVE_DOMAIN_KEY]:
|
|
|
|
|
|
ldap_user_tuple.append('N')
|
2015-12-16 14:41:47 -08:00
|
|
|
|
else:
|
2016-05-04 17:15:45 -07:00
|
|
|
|
ldap_user_tuple.append('Y')
|
|
|
|
|
|
person_attributes = person.getAttributes()
|
|
|
|
|
|
user_id = person_attributes.get(return_attributes_map['user_id'])
|
|
|
|
|
|
user_id = re.sub(r"\r|\n", '', user_id.get(0)).strip().encode('utf8')
|
|
|
|
|
|
self.ldap_user.add(user_id)
|
|
|
|
|
|
|
|
|
|
|
|
for attr_name in return_attributes_actual:
|
|
|
|
|
|
attr = person_attributes.get(attr_name)
|
|
|
|
|
|
if attr:
|
|
|
|
|
|
attr = re.sub(r"\r|\n", '', attr.get(0)).strip().encode('utf8')
|
|
|
|
|
|
# special fix for start_date
|
|
|
|
|
|
if attr_name == return_attributes_map['start_date'] and len(attr) == 4:
|
|
|
|
|
|
attr += '0101'
|
|
|
|
|
|
ldap_user_tuple.append(attr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
ldap_user_tuple.append("")
|
|
|
|
|
|
|
|
|
|
|
|
ldap_user_tuple.append(self.wh_exec_id)
|
|
|
|
|
|
ldap_records.append(ldap_user_tuple)
|
|
|
|
|
|
|
|
|
|
|
|
# Examine the paged results control response
|
|
|
|
|
|
control = ctx.getResponseControls()[0] # will always return a list, but only have one item
|
|
|
|
|
|
if isinstance(control, PagedResultsResponseControl):
|
|
|
|
|
|
cookie = control.getCookie()
|
|
|
|
|
|
|
|
|
|
|
|
# Re-activate paged results
|
|
|
|
|
|
if cookie is None:
|
|
|
|
|
|
# reset ctx, break while loop, do next search
|
|
|
|
|
|
pageControl = PagedResultsControl(pageSize, Control.NONCRITICAL)
|
|
|
|
|
|
c_array = array([pageControl], Control)
|
|
|
|
|
|
ctx.setRequestControls(c_array)
|
|
|
|
|
|
break
|
|
|
|
|
|
else:
|
|
|
|
|
|
self.logger.debug("Have more than one page of result when search " + search_unit)
|
|
|
|
|
|
pageControl = PagedResultsControl(pageSize, cookie, Control.CRITICAL)
|
|
|
|
|
|
c_array = array([pageControl], Control)
|
|
|
|
|
|
ctx.setRequestControls(c_array)
|
2015-12-16 14:41:47 -08:00
|
|
|
|
|
2016-02-03 19:22:18 -08:00
|
|
|
|
self.logger.info("%d records found in ldap search" % (len(self.ldap_user)))
|
2015-12-16 14:41:47 -08:00
|
|
|
|
|
|
|
|
|
|
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
|
|
|
|
|
csv_writer.writerows(ldap_records)
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_ldap_group(self, file):
|
|
|
|
|
|
"""
|
|
|
|
|
|
fetch group mapping from group ldap server
|
|
|
|
|
|
:param file: output file name
|
|
|
|
|
|
"""
|
|
|
|
|
|
settings = Hashtable()
|
|
|
|
|
|
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_GROUP_CONTEXT_FACTORY_KEY])
|
|
|
|
|
|
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_GROUP_CONTEXT_PROVIDER_URL_KEY])
|
|
|
|
|
|
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_PRINCIPAL_KEY])
|
|
|
|
|
|
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_CREDENTIALS_KEY])
|
|
|
|
|
|
|
|
|
|
|
|
ctx = InitialDirContext(settings)
|
|
|
|
|
|
search_target = "(objectClass=posixGroup)"
|
|
|
|
|
|
return_attributes_standard = ['group_id', 'member_ids']
|
2016-05-03 15:17:38 -07:00
|
|
|
|
return_attributes_actual = json.loads(self.args[Constant.LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY])
|
2015-12-16 14:41:47 -08:00
|
|
|
|
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
|
|
|
|
|
|
ctls = SearchControls()
|
|
|
|
|
|
ctls.setReturningAttributes(return_attributes_actual)
|
|
|
|
|
|
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
|
|
|
|
|
|
|
|
|
|
|
|
ldap_records = []
|
2016-05-03 15:17:38 -07:00
|
|
|
|
org_units = json.loads(self.args[Constant.LDAP_GROUP_SEARCH_DOMAINS_KEY])
|
2015-12-16 14:41:47 -08:00
|
|
|
|
for search_unit in org_units:
|
|
|
|
|
|
results = ctx.search(search_unit, search_target, ctls)
|
|
|
|
|
|
for r in results:
|
|
|
|
|
|
person_attributes = r.getAttributes()
|
|
|
|
|
|
group = person_attributes.get(return_attributes_map['group_id']).get(0)
|
|
|
|
|
|
group = re.sub(r"\r|\n", '', group).strip().encode('utf8')
|
|
|
|
|
|
# skip special group that contains all group users
|
|
|
|
|
|
if group == 'users':
|
|
|
|
|
|
continue
|
|
|
|
|
|
members = person_attributes.get(return_attributes_map['member_ids'])
|
|
|
|
|
|
if members:
|
|
|
|
|
|
self.group_map[group] = members
|
|
|
|
|
|
sort_id = 0
|
|
|
|
|
|
for member in members.getAll():
|
|
|
|
|
|
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
|
|
|
|
|
|
ldap_group_tuple = [self.group_app_id]
|
|
|
|
|
|
ldap_group_tuple.append(group)
|
|
|
|
|
|
ldap_group_tuple.append(sort_id)
|
|
|
|
|
|
if member in self.ldap_user:
|
|
|
|
|
|
ldap_group_tuple.append(self.app_id)
|
|
|
|
|
|
else:
|
|
|
|
|
|
ldap_group_tuple.append(self.group_app_id)
|
|
|
|
|
|
ldap_group_tuple.append(member)
|
|
|
|
|
|
ldap_group_tuple.append(self.wh_exec_id)
|
|
|
|
|
|
ldap_records.append(ldap_group_tuple)
|
|
|
|
|
|
sort_id += 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
pass
|
2016-02-03 19:22:18 -08:00
|
|
|
|
self.logger.info("%d records found in group accounts" % (len(self.group_map)))
|
2015-12-16 14:41:47 -08:00
|
|
|
|
|
|
|
|
|
|
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
|
|
|
|
|
csv_writer.writerows(ldap_records)
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_ldap_group_flatten(self, file):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Flatten the group - user map by recursive extending inner-group members
|
|
|
|
|
|
:param file: output file name
|
|
|
|
|
|
"""
|
|
|
|
|
|
ldap_records = []
|
|
|
|
|
|
for group in self.group_map:
|
|
|
|
|
|
all_users = self.get_all_users_for_group(group, self.ldap_user, self.group_map, set())
|
|
|
|
|
|
self.group_flatten_map[group] = all_users
|
|
|
|
|
|
sort_id = 0
|
|
|
|
|
|
for u in all_users:
|
|
|
|
|
|
ldap_group_flatten_tuple = [self.group_app_id]
|
|
|
|
|
|
ldap_group_flatten_tuple.append(group)
|
|
|
|
|
|
ldap_group_flatten_tuple.append(sort_id)
|
|
|
|
|
|
ldap_group_flatten_tuple.append(self.app_id)
|
|
|
|
|
|
ldap_group_flatten_tuple.append(u)
|
|
|
|
|
|
ldap_group_flatten_tuple.append(self.wh_exec_id)
|
|
|
|
|
|
ldap_records.append(ldap_group_flatten_tuple)
|
|
|
|
|
|
sort_id += 1
|
|
|
|
|
|
|
|
|
|
|
|
csv_writer = csv.writer(open(file, "w"), delimiter='', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
|
|
|
|
|
csv_writer.writerows(ldap_records)
|
|
|
|
|
|
|
|
|
|
|
|
def get_all_users_for_group(self, current, user_set, group_map, previous):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Recursive method that calculate all users for current group
|
|
|
|
|
|
:param current: current group name
|
|
|
|
|
|
:param user_set: the user set that contains all user ids
|
|
|
|
|
|
:param group_map: the original group user map before extend
|
|
|
|
|
|
:param previous: previous visited group name
|
|
|
|
|
|
:return: ordered list of users
|
|
|
|
|
|
"""
|
|
|
|
|
|
ret = []
|
|
|
|
|
|
# base condition
|
|
|
|
|
|
if current in user_set:
|
|
|
|
|
|
ret.append(current)
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|
# cyclic condition
|
|
|
|
|
|
if current in previous:
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|
# avoid duplicate computation
|
|
|
|
|
|
if current in self.group_flatten_map:
|
|
|
|
|
|
return self.group_flatten_map[current]
|
|
|
|
|
|
|
|
|
|
|
|
# current is a group
|
|
|
|
|
|
if current in group_map:
|
|
|
|
|
|
members = group_map[current]
|
|
|
|
|
|
previous.add(current)
|
|
|
|
|
|
for member in members.getAll():
|
|
|
|
|
|
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
|
|
|
|
|
|
next_ret = self.get_all_users_for_group(member, user_set, group_map, previous)
|
|
|
|
|
|
for i in next_ret:
|
|
|
|
|
|
if i not in ret:
|
|
|
|
|
|
ret.append(i)
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
|
self.fetch_ldap_user(self.metadata_folder + "/ldap_user_record.csv")
|
|
|
|
|
|
self.fetch_ldap_group(self.metadata_folder + "/ldap_group_record.csv")
|
|
|
|
|
|
self.fetch_ldap_group_flatten(self.metadata_folder + "/ldap_group_flatten_record.csv")
|
2015-12-07 15:27:20 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2015-12-16 14:41:47 -08:00
|
|
|
|
props = sys.argv[1]
|
|
|
|
|
|
ldap = LdapExtract(props)
|
|
|
|
|
|
ldap.run()
|