mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-08 07:26:25 +00:00
Merge pull request #7 from czbernard/linkedin
changes to gitorious crawler and put close connection into finally block in jythons
This commit is contained in:
commit
9eb7f14aa6
@ -16,6 +16,7 @@ package metadata.etl.git;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import metadata.etl.EtlJob;
|
import metadata.etl.EtlJob;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -56,13 +57,13 @@ public class GitMetadataEtl extends EtlJob {
|
|||||||
}
|
}
|
||||||
FileWriter fw = new FileWriter(localDir + "/" + COMMIT_OUTPUT_FILE);
|
FileWriter fw = new FileWriter(localDir + "/" + COMMIT_OUTPUT_FILE);
|
||||||
for (String project : projects) {
|
for (String project : projects) {
|
||||||
List<String> repos = GitUtil.getRepoListFromProject(GitUtil.getHttpsUrl(gitHost, project));
|
Map<String, String> repos = GitUtil.getRepoListFromProject(GitUtil.getHttpsUrl(gitHost, project));
|
||||||
for (String repo : repos) {
|
for (String repo : repos.keySet()) {
|
||||||
String repoUri = GitUtil.getGitUrl(gitHost, repo);
|
String repoUri = repos.get(repo);
|
||||||
String repoDir = localDir + "/" + repo;
|
String repoDir = localDir + "/" + repo;
|
||||||
GitUtil.clone(repoUri, repoDir);
|
GitUtil.clone(repoUri, repoDir);
|
||||||
List<GitUtil.CommitMetadata> commitMetadatas = GitUtil.getRepoMetadata(repoDir);
|
List<GitUtil.CommitMetadata> commitMetadataList = GitUtil.getRepoMetadata(repoDir);
|
||||||
for (GitUtil.CommitMetadata m : commitMetadatas) {
|
for (GitUtil.CommitMetadata m : commitMetadataList) {
|
||||||
fw.append(new GitCommitRecord(m, repoUri));
|
fw.append(new GitCommitRecord(m, repoUri));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,16 +65,18 @@ class AzkabanExtract:
|
|||||||
print e
|
print e
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.collect_flow_jobs(self.metadata_folder + "/flow.csv", self.metadata_folder + "/job.csv", self.metadata_folder + "/dag.csv")
|
try:
|
||||||
self.collect_flow_owners(self.metadata_folder + "/owner.csv")
|
self.collect_flow_jobs(self.metadata_folder + "/flow.csv", self.metadata_folder + "/job.csv", self.metadata_folder + "/dag.csv")
|
||||||
self.collect_flow_schedules(self.metadata_folder + "/schedule.csv")
|
self.collect_flow_owners(self.metadata_folder + "/owner.csv")
|
||||||
self.collect_flow_execs(self.metadata_folder + "/flow_exec.csv", self.metadata_folder + "/job_exec.csv", self.lookback_period)
|
self.collect_flow_schedules(self.metadata_folder + "/schedule.csv")
|
||||||
self.az_cursor.close()
|
self.collect_flow_execs(self.metadata_folder + "/flow_exec.csv", self.metadata_folder + "/job_exec.csv", self.lookback_period)
|
||||||
self.az_con.close()
|
finally:
|
||||||
|
self.az_cursor.close()
|
||||||
|
self.az_con.close()
|
||||||
|
|
||||||
def collect_flow_jobs(self, flow_file, job_file, dag_file):
|
def collect_flow_jobs(self, flow_file, job_file, dag_file):
|
||||||
print "collect flow&jobs"
|
print "collect flow&jobs"
|
||||||
query = "SELECT f.*, p.name as project_name FROM project_flows f inner join projects p on f.project_id = p.id and f.version = p.version where p.active = 1"
|
query = "SELECT distinct f.*, p.name as project_name FROM project_flows f inner join projects p on f.project_id = p.id and f.version = p.version where p.active = 1"
|
||||||
self.az_cursor.execute(query)
|
self.az_cursor.execute(query)
|
||||||
rows = DbUtil.dict_cursor(self.az_cursor)
|
rows = DbUtil.dict_cursor(self.az_cursor)
|
||||||
flow_writer = FileWriter(flow_file)
|
flow_writer = FileWriter(flow_file)
|
||||||
@ -89,7 +91,6 @@ class AzkabanExtract:
|
|||||||
unzipped_content = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(row[json_column].tostring())).read()
|
unzipped_content = gzip.GzipFile(mode='r', fileobj=StringIO.StringIO(row[json_column].tostring())).read()
|
||||||
try:
|
try:
|
||||||
row[json_column] = json.loads(unzipped_content)
|
row[json_column] = json.loads(unzipped_content)
|
||||||
#print json.dumps(row[json_column], indent=4)
|
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -26,21 +26,23 @@ class DatasetTreeBuilder:
|
|||||||
jdbc_driver = args[Constant.WH_DB_DRIVER_KEY]
|
jdbc_driver = args[Constant.WH_DB_DRIVER_KEY]
|
||||||
jdbc_url = args[Constant.WH_DB_URL_KEY]
|
jdbc_url = args[Constant.WH_DB_URL_KEY]
|
||||||
conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver)
|
conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver)
|
||||||
query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by urn"
|
|
||||||
cur = conn_mysql.cursor()
|
cur = conn_mysql.cursor()
|
||||||
cur.execute(query)
|
try:
|
||||||
datasets = cur.fetchall()
|
query = "select distinct id, concat(SUBSTRING_INDEX(urn, ':///', 1), '/', SUBSTRING_INDEX(urn, ':///', -1)) p from dict_dataset order by urn"
|
||||||
self.dataset_dict = dict()
|
cur.execute(query)
|
||||||
for dataset in datasets:
|
datasets = cur.fetchall()
|
||||||
current = self.dataset_dict
|
self.dataset_dict = dict()
|
||||||
path_arr = dataset[1].split('/')
|
for dataset in datasets:
|
||||||
for name in path_arr:
|
current = self.dataset_dict
|
||||||
current = current.setdefault(name, {})
|
path_arr = dataset[1].split('/')
|
||||||
current["__ID_OF_DATASET__"] = dataset[0]
|
for name in path_arr:
|
||||||
self.file_name = args[Constant.DATASET_TREE_FILE_NAME_KEY]
|
current = current.setdefault(name, {})
|
||||||
self.value = []
|
current["__ID_OF_DATASET__"] = dataset[0]
|
||||||
cur.close()
|
self.file_name = args[Constant.DATASET_TREE_FILE_NAME_KEY]
|
||||||
conn_mysql.close()
|
self.value = []
|
||||||
|
finally:
|
||||||
|
cur.close()
|
||||||
|
conn_mysql.close()
|
||||||
|
|
||||||
def build_trie_helper(self, depth, path, current, current_dict):
|
def build_trie_helper(self, depth, path, current, current_dict):
|
||||||
nodes = []
|
nodes = []
|
||||||
|
@ -26,27 +26,29 @@ class FlowTreeBuilder:
|
|||||||
jdbc_driver = args[Constant.WH_DB_DRIVER_KEY]
|
jdbc_driver = args[Constant.WH_DB_DRIVER_KEY]
|
||||||
jdbc_url = args[Constant.WH_DB_URL_KEY]
|
jdbc_url = args[Constant.WH_DB_URL_KEY]
|
||||||
conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver)
|
conn_mysql = zxJDBC.connect(jdbc_url, username, password, jdbc_driver)
|
||||||
query = "select distinct f.flow_id, f.flow_name, f.flow_group, ca.app_code from flow f join cfg_application ca on f.app_id = ca.app_id order by app_code, flow_name"
|
|
||||||
cur = conn_mysql.cursor()
|
cur = conn_mysql.cursor()
|
||||||
cur.execute(query)
|
try:
|
||||||
flows = cur.fetchall()
|
query = "select distinct f.flow_id, f.flow_name, f.flow_group, ca.app_code from flow f join cfg_application ca on f.app_id = ca.app_id order by app_code, flow_name"
|
||||||
self.flow_dict = dict()
|
cur.execute(query)
|
||||||
for flow in flows:
|
flows = cur.fetchall()
|
||||||
current = self.flow_dict
|
self.flow_dict = dict()
|
||||||
# if needed, use flow[3].replace(' ', '.')
|
for flow in flows:
|
||||||
current = current.setdefault(flow[3], {})
|
current = self.flow_dict
|
||||||
if flow[2] is not None:
|
# if needed, use flow[3].replace(' ', '.')
|
||||||
current = current.setdefault(flow[2], {})
|
current = current.setdefault(flow[3], {})
|
||||||
# for oozie
|
if flow[2] is not None:
|
||||||
else:
|
current = current.setdefault(flow[2], {})
|
||||||
current = current.setdefault('NA', {})
|
# for oozie
|
||||||
|
else:
|
||||||
|
current = current.setdefault('NA', {})
|
||||||
|
|
||||||
current = current.setdefault(flow[1], {})
|
current = current.setdefault(flow[1], {})
|
||||||
current["__ID_OF_FLOW__"] = flow[0]
|
current["__ID_OF_FLOW__"] = flow[0]
|
||||||
self.file_name = args[Constant.FLOW_TREE_FILE_NAME_KEY]
|
self.file_name = args[Constant.FLOW_TREE_FILE_NAME_KEY]
|
||||||
self.value = []
|
self.value = []
|
||||||
cur.close()
|
finally:
|
||||||
conn_mysql.close()
|
cur.close()
|
||||||
|
conn_mysql.close()
|
||||||
|
|
||||||
def build_trie_helper(self, depth, current, current_dict):
|
def build_trie_helper(self, depth, current, current_dict):
|
||||||
nodes = []
|
nodes = []
|
||||||
|
@ -20,22 +20,23 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class GitLoad:
|
class GitLoad:
|
||||||
|
def __init__(self, args):
|
||||||
|
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||||
|
args[Constant.WH_DB_USERNAME_KEY],
|
||||||
|
args[Constant.WH_DB_PASSWORD_KEY],
|
||||||
|
args[Constant.WH_DB_DRIVER_KEY])
|
||||||
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
|
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||||
|
|
||||||
def __init__(self, args):
|
def run(self):
|
||||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
try:
|
||||||
args[Constant.WH_DB_USERNAME_KEY],
|
self.load_from_stg()
|
||||||
args[Constant.WH_DB_PASSWORD_KEY],
|
finally:
|
||||||
args[Constant.WH_DB_DRIVER_KEY])
|
self.wh_cursor.close()
|
||||||
self.wh_cursor = self.wh_con.cursor()
|
self.wh_con.close()
|
||||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
|
||||||
|
|
||||||
def run(self):
|
def load_from_stg(self):
|
||||||
self.load_from_stg()
|
query = """
|
||||||
self.wh_cursor.close()
|
|
||||||
self.wh_con.close()
|
|
||||||
|
|
||||||
def load_from_stg(self):
|
|
||||||
query = """
|
|
||||||
INSERT IGNORE INTO source_code_commit_info
|
INSERT IGNORE INTO source_code_commit_info
|
||||||
(
|
(
|
||||||
app_id, repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email,
|
app_id, repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email,
|
||||||
@ -46,11 +47,12 @@ class GitLoad:
|
|||||||
from stg_source_code_commit_info s
|
from stg_source_code_commit_info s
|
||||||
where s.app_id = {app_id}
|
where s.app_id = {app_id}
|
||||||
""".format(app_id=self.app_id)
|
""".format(app_id=self.app_id)
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
git = GitLoad(props)
|
git = GitLoad(props)
|
||||||
git.run()
|
git.run()
|
||||||
|
@ -20,16 +20,16 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class OwnerTransform:
|
class OwnerTransform:
|
||||||
_tables = {"source_code_commit": {"columns": "repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email, author_name, author_email, message",
|
_tables = {"source_code_commit": {"columns": "repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email, author_name, author_email, message",
|
||||||
"file": "commit.csv",
|
"file": "commit.csv",
|
||||||
"table": "stg_source_code_commit_info"}
|
"table": "stg_source_code_commit_info"}
|
||||||
}
|
}
|
||||||
|
|
||||||
_clear_staging_tempalte = """
|
_clear_staging_tempalte = """
|
||||||
DELETE FROM {table}
|
DELETE FROM {table}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_read_file_template = """
|
_read_file_template = """
|
||||||
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
||||||
INTO TABLE {table}
|
INTO TABLE {table}
|
||||||
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
||||||
@ -39,43 +39,46 @@ class OwnerTransform:
|
|||||||
wh_etl_exec_id = {wh_etl_exec_id};
|
wh_etl_exec_id = {wh_etl_exec_id};
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||||
args[Constant.WH_DB_USERNAME_KEY],
|
args[Constant.WH_DB_USERNAME_KEY],
|
||||||
args[Constant.WH_DB_PASSWORD_KEY],
|
args[Constant.WH_DB_PASSWORD_KEY],
|
||||||
args[Constant.WH_DB_DRIVER_KEY])
|
args[Constant.WH_DB_DRIVER_KEY])
|
||||||
self.wh_cursor = self.wh_con.cursor()
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||||
self.wh_etl_exec_id = int(args[Constant.WH_EXEC_ID_KEY])
|
self.wh_etl_exec_id = int(args[Constant.WH_EXEC_ID_KEY])
|
||||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||||
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.read_file_to_stg()
|
try:
|
||||||
self.wh_cursor.close()
|
self.read_file_to_stg()
|
||||||
self.wh_con.close()
|
finally:
|
||||||
|
self.wh_cursor.close()
|
||||||
|
self.wh_con.close()
|
||||||
|
|
||||||
def read_file_to_stg(self):
|
def read_file_to_stg(self):
|
||||||
t = self._tables["source_code_commit"]
|
t = self._tables["source_code_commit"]
|
||||||
|
|
||||||
# Clear stagging table
|
# Clear stagging table
|
||||||
query = self._clear_staging_tempalte.format(table=t.get("table"))
|
query = self._clear_staging_tempalte.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
|
# Load file into stagging table
|
||||||
|
query = self._read_file_template.format(folder=self.metadata_folder,
|
||||||
|
file=t.get("file"),
|
||||||
|
table=t.get("table"),
|
||||||
|
columns=t.get("columns"),
|
||||||
|
app_id=self.app_id,
|
||||||
|
wh_etl_exec_id=self.wh_etl_exec_id)
|
||||||
|
print query
|
||||||
|
self.wh_cursor.execute(query)
|
||||||
|
self.wh_con.commit()
|
||||||
|
|
||||||
# Load file into stagging table
|
|
||||||
query = self._read_file_template.format(folder=self.metadata_folder,
|
|
||||||
file=t.get("file"),
|
|
||||||
table=t.get("table"),
|
|
||||||
columns=t.get("columns"),
|
|
||||||
app_id=self.app_id,
|
|
||||||
wh_etl_exec_id=self.wh_etl_exec_id)
|
|
||||||
print query
|
|
||||||
self.wh_cursor.execute(query)
|
|
||||||
self.wh_con.commit()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
ot = OwnerTransform(props)
|
ot = OwnerTransform(props)
|
||||||
ot.run()
|
ot.run()
|
||||||
|
@ -263,7 +263,9 @@ if __name__ == "__main__":
|
|||||||
l.db_id = args[Constant.DB_ID_KEY]
|
l.db_id = args[Constant.DB_ID_KEY]
|
||||||
l.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
|
l.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
|
||||||
l.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
|
l.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
|
||||||
l.load_metadata()
|
try:
|
||||||
l.load_field()
|
l.load_metadata()
|
||||||
l.load_sample()
|
l.load_field()
|
||||||
l.conn_mysql.close()
|
l.load_sample()
|
||||||
|
finally:
|
||||||
|
l.conn_mysql.close()
|
||||||
|
@ -24,215 +24,211 @@ from java.io import FileWriter
|
|||||||
|
|
||||||
|
|
||||||
class LdapExtract:
|
class LdapExtract:
|
||||||
|
def __init__(self, args):
|
||||||
|
self.args = args
|
||||||
|
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||||
|
self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY])
|
||||||
|
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
|
||||||
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||||
|
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
||||||
|
if not os.path.exists(self.metadata_folder):
|
||||||
|
try:
|
||||||
|
os.makedirs(self.metadata_folder)
|
||||||
|
except Exception as e:
|
||||||
|
print e
|
||||||
|
|
||||||
def __init__(self, args):
|
self.ldap_user = set()
|
||||||
self.args = args
|
self.group_map = dict()
|
||||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
self.group_flatten_map = dict()
|
||||||
self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY])
|
|
||||||
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
|
|
||||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
|
||||||
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
|
||||||
if not os.path.exists(self.metadata_folder):
|
|
||||||
try:
|
|
||||||
os.makedirs(self.metadata_folder)
|
|
||||||
except Exception as e:
|
|
||||||
print e
|
|
||||||
|
|
||||||
self.ldap_user = set()
|
def split_property(self, property_value):
|
||||||
self.group_map = dict()
|
return re.split('\s*\'\s*,\s*\'\s*', property_value.strip('\' \t\n\r\f\v'))
|
||||||
self.group_flatten_map = dict()
|
|
||||||
|
|
||||||
def split_property(self, property_value):
|
def fetch_ldap_user(self, file):
|
||||||
return re.split('\s*\'\s*,\s*\'\s*', property_value.strip('\' \t\n\r\f\v'))
|
"""
|
||||||
|
fetch ldap user from ldap server
|
||||||
|
:param file: output file name
|
||||||
|
"""
|
||||||
|
|
||||||
def fetch_ldap_user(self, file):
|
# Setup LDAP Context Options
|
||||||
"""
|
settings = Hashtable()
|
||||||
fetch ldap user from ldap server
|
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_CONTEXT_FACTORY_KEY])
|
||||||
:param file: output file name
|
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_CONTEXT_PROVIDER_URL_KEY])
|
||||||
"""
|
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_CONTEXT_SECURITY_PRINCIPAL_KEY])
|
||||||
|
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_CONTEXT_SECURITY_CREDENTIALS_KEY])
|
||||||
|
|
||||||
# Setup LDAP Context Options
|
# Connect to LDAP Server
|
||||||
settings = Hashtable()
|
ctx = InitialDirContext(settings)
|
||||||
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_CONTEXT_FACTORY_KEY])
|
|
||||||
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_CONTEXT_PROVIDER_URL_KEY])
|
|
||||||
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_CONTEXT_SECURITY_PRINCIPAL_KEY])
|
|
||||||
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_CONTEXT_SECURITY_CREDENTIALS_KEY])
|
|
||||||
|
|
||||||
# Connect to LDAP Server
|
# load the java Hashtable out of the ldap server
|
||||||
ctx = InitialDirContext(settings)
|
# Query starting point and query target
|
||||||
|
search_target = '(objectClass=person)'
|
||||||
|
return_attributes_standard = ['user_id', 'distinct_name', 'name', 'display_name', 'title', 'employee_number', 'manager', 'mail', 'department_number', 'department', 'start_date', 'mobile']
|
||||||
|
return_attributes_actual = self.split_property(self.args[Constant.LDAP_SEARCH_RETURN_ATTRS_KEY])
|
||||||
|
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
|
||||||
|
|
||||||
# load the java Hashtable out of the ldap server
|
ctls = SearchControls()
|
||||||
# Query starting point and query target
|
ctls.setReturningAttributes(return_attributes_actual)
|
||||||
search_target = '(objectClass=person)'
|
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
|
||||||
return_attributes_standard = ['user_id', 'distinct_name', 'name', 'display_name', 'title', 'employee_number', 'manager', 'mail', 'department_number', 'department', 'start_date', 'mobile']
|
ldap_records = []
|
||||||
return_attributes_actual = self.split_property(self.args[Constant.LDAP_SEARCH_RETURN_ATTRS_KEY])
|
|
||||||
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
|
|
||||||
|
|
||||||
ctls = SearchControls()
|
# domain format should look like : 'OU=domain1','OU=domain2','OU=domain3,OU=subdomain3'
|
||||||
ctls.setReturningAttributes(return_attributes_actual)
|
org_units = self.split_property(self.args[Constant.LDAP_SEARCH_DOMAINS_KEY])
|
||||||
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
|
|
||||||
ldap_records = []
|
|
||||||
|
|
||||||
# domain format should look like : 'OU=domain1','OU=domain2','OU=domain3,OU=subdomain3'
|
for search_unit in org_units:
|
||||||
org_units = self.split_property(self.args[Constant.LDAP_SEARCH_DOMAINS_KEY])
|
search_result = ctx.search(search_unit, search_target, ctls)
|
||||||
|
|
||||||
for search_unit in org_units:
|
# print search_return_attributes
|
||||||
search_result = ctx.search(search_unit, search_target, ctls)
|
for person in search_result:
|
||||||
|
ldap_user_tuple = [self.app_id]
|
||||||
|
if search_unit == self.args[Constant.LDAP_INACTIVE_DOMAIN_KEY]:
|
||||||
|
ldap_user_tuple.append('N')
|
||||||
|
else:
|
||||||
|
ldap_user_tuple.append('Y')
|
||||||
|
person_attributes = person.getAttributes()
|
||||||
|
user_id = person_attributes.get(return_attributes_map['user_id'])
|
||||||
|
user_id = re.sub(r"\r|\n", '', user_id.get(0)).strip().encode('utf8')
|
||||||
|
self.ldap_user.add(user_id)
|
||||||
|
|
||||||
# print search_return_attributes
|
for attr_name in return_attributes_actual:
|
||||||
for person in search_result:
|
attr = person_attributes.get(attr_name)
|
||||||
ldap_user_tuple = [self.app_id]
|
if attr:
|
||||||
if search_unit == self.args[Constant.LDAP_INACTIVE_DOMAIN_KEY]:
|
attr = re.sub(r"\r|\n", '', attr.get(0)).strip().encode('utf8')
|
||||||
ldap_user_tuple.append('N')
|
# special fix for start_date
|
||||||
else:
|
if attr_name == return_attributes_map['start_date'] and len(attr) == 4:
|
||||||
ldap_user_tuple.append('Y')
|
attr += '0101'
|
||||||
person_attributes = person.getAttributes()
|
ldap_user_tuple.append(attr)
|
||||||
user_id = person_attributes.get(return_attributes_map['user_id'])
|
else:
|
||||||
user_id = re.sub(r"\r|\n", '', user_id.get(0)).strip().encode('utf8')
|
ldap_user_tuple.append("")
|
||||||
self.ldap_user.add(user_id)
|
|
||||||
|
|
||||||
for attr_name in return_attributes_actual:
|
ldap_user_tuple.append(self.wh_exec_id)
|
||||||
attr = person_attributes.get(attr_name)
|
ldap_records.append(ldap_user_tuple)
|
||||||
if attr:
|
|
||||||
attr = re.sub(r"\r|\n", '', attr.get(0)).strip().encode('utf8')
|
|
||||||
# special fix for start_date
|
|
||||||
if attr_name == return_attributes_map['start_date'] and len(attr) == 4:
|
|
||||||
attr += '0101'
|
|
||||||
ldap_user_tuple.append(attr)
|
|
||||||
else:
|
|
||||||
ldap_user_tuple.append("")
|
|
||||||
|
|
||||||
ldap_user_tuple.append(self.wh_exec_id)
|
print "%d records found in ldap search" % (len(self.ldap_user))
|
||||||
ldap_records.append(ldap_user_tuple)
|
|
||||||
|
|
||||||
print "%d records found in ldap search" % (len(self.ldap_user))
|
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
||||||
|
csv_writer.writerows(ldap_records)
|
||||||
|
|
||||||
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
def fetch_ldap_group(self, file):
|
||||||
csv_writer.writerows(ldap_records)
|
"""
|
||||||
|
fetch group mapping from group ldap server
|
||||||
|
:param file: output file name
|
||||||
|
"""
|
||||||
|
settings = Hashtable()
|
||||||
|
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_GROUP_CONTEXT_FACTORY_KEY])
|
||||||
|
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_GROUP_CONTEXT_PROVIDER_URL_KEY])
|
||||||
|
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_PRINCIPAL_KEY])
|
||||||
|
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_CREDENTIALS_KEY])
|
||||||
|
|
||||||
def fetch_ldap_group(self, file):
|
ctx = InitialDirContext(settings)
|
||||||
"""
|
search_target = "(objectClass=posixGroup)"
|
||||||
fetch group mapping from group ldap server
|
return_attributes_standard = ['group_id', 'member_ids']
|
||||||
:param file: output file name
|
return_attributes_actual = self.split_property(self.args[Constant.LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY])
|
||||||
"""
|
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
|
||||||
settings = Hashtable()
|
ctls = SearchControls()
|
||||||
settings.put(Context.INITIAL_CONTEXT_FACTORY, self.args[Constant.LDAP_GROUP_CONTEXT_FACTORY_KEY])
|
ctls.setReturningAttributes(return_attributes_actual)
|
||||||
settings.put(Context.PROVIDER_URL, self.args[Constant.LDAP_GROUP_CONTEXT_PROVIDER_URL_KEY])
|
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
|
||||||
settings.put(Context.SECURITY_PRINCIPAL, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_PRINCIPAL_KEY])
|
|
||||||
settings.put(Context.SECURITY_CREDENTIALS, self.args[Constant.LDAP_GROUP_CONTEXT_SECURITY_CREDENTIALS_KEY])
|
|
||||||
|
|
||||||
ctx = InitialDirContext(settings)
|
ldap_records = []
|
||||||
search_target = "(objectClass=posixGroup)"
|
org_units = self.split_property(self.args[Constant.LDAP_GROUP_SEARCH_DOMAINS_KEY])
|
||||||
return_attributes_standard = ['group_id', 'member_ids']
|
for search_unit in org_units:
|
||||||
return_attributes_actual = self.split_property(self.args[Constant.LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY])
|
results = ctx.search(search_unit, search_target, ctls)
|
||||||
return_attributes_map = dict(zip(return_attributes_standard, return_attributes_actual))
|
for r in results:
|
||||||
ctls = SearchControls()
|
person_attributes = r.getAttributes()
|
||||||
ctls.setReturningAttributes(return_attributes_actual)
|
group = person_attributes.get(return_attributes_map['group_id']).get(0)
|
||||||
ctls.setSearchScope(SearchControls.SUBTREE_SCOPE)
|
group = re.sub(r"\r|\n", '', group).strip().encode('utf8')
|
||||||
|
# skip special group that contains all group users
|
||||||
|
if group == 'users':
|
||||||
|
continue
|
||||||
|
members = person_attributes.get(return_attributes_map['member_ids'])
|
||||||
|
if members:
|
||||||
|
self.group_map[group] = members
|
||||||
|
sort_id = 0
|
||||||
|
for member in members.getAll():
|
||||||
|
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
|
||||||
|
ldap_group_tuple = [self.group_app_id]
|
||||||
|
ldap_group_tuple.append(group)
|
||||||
|
ldap_group_tuple.append(sort_id)
|
||||||
|
if member in self.ldap_user:
|
||||||
|
ldap_group_tuple.append(self.app_id)
|
||||||
|
else:
|
||||||
|
ldap_group_tuple.append(self.group_app_id)
|
||||||
|
ldap_group_tuple.append(member)
|
||||||
|
ldap_group_tuple.append(self.wh_exec_id)
|
||||||
|
ldap_records.append(ldap_group_tuple)
|
||||||
|
sort_id += 1
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
print "%d records found in group accounts" % (len(self.group_map))
|
||||||
|
|
||||||
ldap_records = []
|
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
||||||
org_units = self.split_property(self.args[Constant.LDAP_GROUP_SEARCH_DOMAINS_KEY])
|
csv_writer.writerows(ldap_records)
|
||||||
for search_unit in org_units:
|
|
||||||
results = ctx.search(search_unit, search_target, ctls)
|
|
||||||
for r in results:
|
|
||||||
person_attributes = r.getAttributes()
|
|
||||||
group = person_attributes.get(return_attributes_map['group_id']).get(0)
|
|
||||||
group = re.sub(r"\r|\n", '', group).strip().encode('utf8')
|
|
||||||
# skip special group that contains all group users
|
|
||||||
if group == 'users':
|
|
||||||
continue
|
|
||||||
members = person_attributes.get(return_attributes_map['member_ids'])
|
|
||||||
if members:
|
|
||||||
self.group_map[group] = members
|
|
||||||
sort_id = 0
|
|
||||||
for member in members.getAll():
|
|
||||||
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
|
|
||||||
ldap_group_tuple = [self.group_app_id]
|
|
||||||
ldap_group_tuple.append(group)
|
|
||||||
ldap_group_tuple.append(sort_id)
|
|
||||||
if member in self.ldap_user:
|
|
||||||
ldap_group_tuple.append(self.app_id)
|
|
||||||
else:
|
|
||||||
ldap_group_tuple.append(self.group_app_id)
|
|
||||||
ldap_group_tuple.append(member)
|
|
||||||
ldap_group_tuple.append(self.wh_exec_id)
|
|
||||||
ldap_records.append(ldap_group_tuple)
|
|
||||||
sort_id += 1
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
print "%d records found in group accounts" % (len(self.group_map))
|
|
||||||
|
|
||||||
csv_writer = csv.writer(open(file, "w"), delimiter='\x1a', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
def fetch_ldap_group_flatten(self, file):
|
||||||
csv_writer.writerows(ldap_records)
|
"""
|
||||||
|
Flatten the group - user map by recursive extending inner-group members
|
||||||
|
:param file: output file name
|
||||||
|
"""
|
||||||
|
ldap_records = []
|
||||||
|
for group in self.group_map:
|
||||||
|
all_users = self.get_all_users_for_group(group, self.ldap_user, self.group_map, set())
|
||||||
|
self.group_flatten_map[group] = all_users
|
||||||
|
sort_id = 0
|
||||||
|
for u in all_users:
|
||||||
|
ldap_group_flatten_tuple = [self.group_app_id]
|
||||||
|
ldap_group_flatten_tuple.append(group)
|
||||||
|
ldap_group_flatten_tuple.append(sort_id)
|
||||||
|
ldap_group_flatten_tuple.append(self.app_id)
|
||||||
|
ldap_group_flatten_tuple.append(u)
|
||||||
|
ldap_group_flatten_tuple.append(self.wh_exec_id)
|
||||||
|
ldap_records.append(ldap_group_flatten_tuple)
|
||||||
|
sort_id += 1
|
||||||
|
|
||||||
def fetch_ldap_group_flatten(self, file):
|
csv_writer = csv.writer(open(file, "w"), delimiter='', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
||||||
"""
|
csv_writer.writerows(ldap_records)
|
||||||
Flatten the group - user map by recursive extending inner-group members
|
|
||||||
:param file: output file name
|
|
||||||
"""
|
|
||||||
ldap_records = []
|
|
||||||
for group in self.group_map:
|
|
||||||
all_users = self.get_all_users_for_group(group, self.ldap_user, self.group_map, set())
|
|
||||||
self.group_flatten_map[group] = all_users
|
|
||||||
sort_id = 0
|
|
||||||
for u in all_users:
|
|
||||||
ldap_group_flatten_tuple = [self.group_app_id]
|
|
||||||
ldap_group_flatten_tuple.append(group)
|
|
||||||
ldap_group_flatten_tuple.append(sort_id)
|
|
||||||
ldap_group_flatten_tuple.append(self.app_id)
|
|
||||||
ldap_group_flatten_tuple.append(u)
|
|
||||||
ldap_group_flatten_tuple.append(self.wh_exec_id)
|
|
||||||
ldap_records.append(ldap_group_flatten_tuple)
|
|
||||||
sort_id += 1
|
|
||||||
|
|
||||||
csv_writer = csv.writer(open(file, "w"), delimiter='', quoting=csv.QUOTE_MINIMAL, lineterminator="\n")
|
def get_all_users_for_group(self, current, user_set, group_map, previous):
|
||||||
csv_writer.writerows(ldap_records)
|
"""
|
||||||
|
Recursive method that calculate all users for current group
|
||||||
|
:param current: current group name
|
||||||
|
:param user_set: the user set that contains all user ids
|
||||||
|
:param group_map: the original group user map before extend
|
||||||
|
:param previous: previous visited group name
|
||||||
|
:return: ordered list of users
|
||||||
|
"""
|
||||||
|
ret = []
|
||||||
|
# base condition
|
||||||
|
if current in user_set:
|
||||||
|
ret.append(current)
|
||||||
|
return ret
|
||||||
|
|
||||||
def get_all_users_for_group(self, current, user_set, group_map, previous):
|
# cyclic condition
|
||||||
"""
|
if current in previous:
|
||||||
Recursive method that calculate all users for current group
|
return ret
|
||||||
:param current: current group name
|
|
||||||
:param user_set: the user set that contains all user ids
|
|
||||||
:param group_map: the original group user map before extend
|
|
||||||
:param previous: previous visited group name
|
|
||||||
:return: ordered list of users
|
|
||||||
"""
|
|
||||||
ret = []
|
|
||||||
# base condition
|
|
||||||
if current in user_set:
|
|
||||||
ret.append(current)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
# cyclic condition
|
# avoid duplicate computation
|
||||||
if current in previous:
|
if current in self.group_flatten_map:
|
||||||
return ret
|
return self.group_flatten_map[current]
|
||||||
|
|
||||||
# avoid duplicate computation
|
# current is a group
|
||||||
if current in self.group_flatten_map:
|
if current in group_map:
|
||||||
return self.group_flatten_map[current]
|
members = group_map[current]
|
||||||
|
previous.add(current)
|
||||||
|
for member in members.getAll():
|
||||||
|
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
|
||||||
|
next_ret = self.get_all_users_for_group(member, user_set, group_map, previous)
|
||||||
|
for i in next_ret:
|
||||||
|
if i not in ret:
|
||||||
|
ret.append(i)
|
||||||
|
return ret
|
||||||
|
|
||||||
# current is a group
|
def run(self):
|
||||||
if current in group_map:
|
self.fetch_ldap_user(self.metadata_folder + "/ldap_user_record.csv")
|
||||||
members = group_map[current]
|
self.fetch_ldap_group(self.metadata_folder + "/ldap_group_record.csv")
|
||||||
previous.add(current)
|
self.fetch_ldap_group_flatten(self.metadata_folder + "/ldap_group_flatten_record.csv")
|
||||||
for member in members.getAll():
|
|
||||||
member = re.sub(r"\r|\n", '', member).strip().encode('utf8')
|
|
||||||
next_ret = self.get_all_users_for_group(member, user_set, group_map, previous)
|
|
||||||
for i in next_ret:
|
|
||||||
if i not in ret:
|
|
||||||
ret.append(i)
|
|
||||||
return ret
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
self.fetch_ldap_user(self.metadata_folder + "/ldap_user_record.csv")
|
|
||||||
self.fetch_ldap_group(self.metadata_folder + "/ldap_group_record.csv")
|
|
||||||
self.fetch_ldap_group_flatten(self.metadata_folder + "/ldap_group_flatten_record.csv")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
ldap = LdapExtract(props)
|
ldap = LdapExtract(props)
|
||||||
ldap.run()
|
ldap.run()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,24 +20,25 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class LdapLoad:
|
class LdapLoad:
|
||||||
|
def __init__(self, args):
|
||||||
|
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||||
|
args[Constant.WH_DB_USERNAME_KEY],
|
||||||
|
args[Constant.WH_DB_PASSWORD_KEY],
|
||||||
|
args[Constant.WH_DB_DRIVER_KEY])
|
||||||
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
|
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||||
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||||
|
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
||||||
|
|
||||||
def __init__(self, args):
|
def run(self):
|
||||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
try:
|
||||||
args[Constant.WH_DB_USERNAME_KEY],
|
self.load_from_stg()
|
||||||
args[Constant.WH_DB_PASSWORD_KEY],
|
finally:
|
||||||
args[Constant.WH_DB_DRIVER_KEY])
|
self.wh_cursor.close()
|
||||||
self.wh_cursor = self.wh_con.cursor()
|
self.wh_con.close()
|
||||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
|
||||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
|
||||||
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
|
||||||
|
|
||||||
def run(self):
|
def load_from_stg(self):
|
||||||
self.load_from_stg()
|
query = """
|
||||||
self.wh_cursor.close()
|
|
||||||
self.wh_con.close()
|
|
||||||
|
|
||||||
def load_from_stg(self):
|
|
||||||
query = """
|
|
||||||
INSERT INTO dir_external_user_info
|
INSERT INTO dir_external_user_info
|
||||||
(
|
(
|
||||||
app_id, user_id, urn, full_name, display_name, title, employee_number,
|
app_id, user_id, urn, full_name, display_name, title, employee_number,
|
||||||
@ -69,11 +70,11 @@ class LdapLoad:
|
|||||||
modified_time = unix_timestamp(NOW()),
|
modified_time = unix_timestamp(NOW()),
|
||||||
wh_etl_exec_id = s.wh_etl_exec_id
|
wh_etl_exec_id = s.wh_etl_exec_id
|
||||||
"""
|
"""
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
query = """
|
query = """
|
||||||
INSERT INTO dir_external_group_user_map
|
INSERT INTO dir_external_group_user_map
|
||||||
(app_id, group_id, sort_id, user_app_id, user_id, created_time, wh_etl_exec_id)
|
(app_id, group_id, sort_id, user_app_id, user_id, created_time, wh_etl_exec_id)
|
||||||
SELECT app_id, group_id, sort_id, user_app_id, user_id, unix_timestamp(NOW()), wh_etl_exec_id
|
SELECT app_id, group_id, sort_id, user_app_id, user_id, unix_timestamp(NOW()), wh_etl_exec_id
|
||||||
@ -82,11 +83,11 @@ class LdapLoad:
|
|||||||
modified_time = unix_timestamp(NOW()),
|
modified_time = unix_timestamp(NOW()),
|
||||||
wh_etl_exec_id = s.wh_etl_exec_id
|
wh_etl_exec_id = s.wh_etl_exec_id
|
||||||
"""
|
"""
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
query = """
|
query = """
|
||||||
INSERT INTO dir_external_group_user_map_flatten
|
INSERT INTO dir_external_group_user_map_flatten
|
||||||
(app_id, group_id, sort_id, user_app_id, user_id, created_time, wh_etl_exec_id)
|
(app_id, group_id, sort_id, user_app_id, user_id, created_time, wh_etl_exec_id)
|
||||||
SELECT app_id, group_id, sort_id, user_app_id, user_id, unix_timestamp(NOW()), wh_etl_exec_id
|
SELECT app_id, group_id, sort_id, user_app_id, user_id, unix_timestamp(NOW()), wh_etl_exec_id
|
||||||
@ -95,11 +96,12 @@ class LdapLoad:
|
|||||||
modified_time = unix_timestamp(NOW()),
|
modified_time = unix_timestamp(NOW()),
|
||||||
wh_etl_exec_id = s.wh_etl_exec_id
|
wh_etl_exec_id = s.wh_etl_exec_id
|
||||||
"""
|
"""
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
lt = LdapLoad(props)
|
lt = LdapLoad(props)
|
||||||
lt.run()
|
lt.run()
|
||||||
|
@ -20,31 +20,32 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class LdapTransform:
|
class LdapTransform:
|
||||||
_tables = {"ldap_user": {"columns": "app_id, is_active, user_id, urn, full_name, display_name, title, employee_number, manager_urn, email, department_id, department_name, start_date, mobile_phone, wh_etl_exec_id",
|
_tables = {"ldap_user": {
|
||||||
"file": "ldap_user_record.csv",
|
"columns": "app_id, is_active, user_id, urn, full_name, display_name, title, employee_number, manager_urn, email, department_id, department_name, start_date, mobile_phone, wh_etl_exec_id",
|
||||||
"table": "stg_dir_external_user_info",
|
"file": "ldap_user_record.csv",
|
||||||
"nullif_columns":
|
"table": "stg_dir_external_user_info",
|
||||||
{"department_id": "''",
|
"nullif_columns":
|
||||||
"employee_number": 0,
|
{"department_id": "''",
|
||||||
"start_date": "'0000-00-00'",
|
"employee_number": 0,
|
||||||
"manager_urn": "''",
|
"start_date": "'0000-00-00'",
|
||||||
"department_name": "''",
|
"manager_urn": "''",
|
||||||
"mobile_phone": "''",
|
"department_name": "''",
|
||||||
"email": "''",
|
"mobile_phone": "''",
|
||||||
"title": "''"}
|
"email": "''",
|
||||||
},
|
"title": "''"}
|
||||||
"ldap_group": {"columns": "app_id, group_id, sort_id, user_app_id, user_id, wh_etl_exec_id",
|
},
|
||||||
"file": "ldap_group_record.csv",
|
"ldap_group": {"columns": "app_id, group_id, sort_id, user_app_id, user_id, wh_etl_exec_id",
|
||||||
"table": "stg_dir_external_group_user_map",
|
"file": "ldap_group_record.csv",
|
||||||
"nullif_columns": {"user_id": "''"}
|
"table": "stg_dir_external_group_user_map",
|
||||||
},
|
"nullif_columns": {"user_id": "''"}
|
||||||
"ldap_group_flatten": {"columns": "app_id, group_id, sort_id, user_app_id, user_id, wh_etl_exec_id",
|
},
|
||||||
"file": "ldap_group_flatten_record.csv",
|
"ldap_group_flatten": {"columns": "app_id, group_id, sort_id, user_app_id, user_id, wh_etl_exec_id",
|
||||||
"table": "stg_dir_external_group_user_map_flatten"
|
"file": "ldap_group_flatten_record.csv",
|
||||||
}
|
"table": "stg_dir_external_group_user_map_flatten"
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_read_file_template = """
|
_read_file_template = """
|
||||||
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
||||||
INTO TABLE {table}
|
INTO TABLE {table}
|
||||||
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
||||||
@ -52,13 +53,13 @@ class LdapTransform:
|
|||||||
({columns});
|
({columns});
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_column_to_null_template = """
|
_update_column_to_null_template = """
|
||||||
UPDATE {table} stg
|
UPDATE {table} stg
|
||||||
SET {column} = NULL
|
SET {column} = NULL
|
||||||
WHERE {column} = {column_value} and app_id = {app_id}
|
WHERE {column} = {column_value} and app_id = {app_id}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_manager_info = """
|
_update_manager_info = """
|
||||||
update {table} stg
|
update {table} stg
|
||||||
join (select t1.app_id, t1.user_id, t1.employee_number, t2.user_id as manager_user_id, t2.employee_number as manager_employee_number from
|
join (select t1.app_id, t1.user_id, t1.employee_number, t2.user_id as manager_user_id, t2.employee_number as manager_employee_number from
|
||||||
{table} t1 join {table} t2 on t1.manager_urn = t2.urn and t1.app_id = t2.app_id
|
{table} t1 join {table} t2 on t1.manager_urn = t2.urn and t1.app_id = t2.app_id
|
||||||
@ -69,138 +70,142 @@ class LdapTransform:
|
|||||||
WHERE stg.app_id = {app_id}
|
WHERE stg.app_id = {app_id}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_get_manager_edge = """
|
_get_manager_edge = """
|
||||||
select user_id, manager_user_id from {table} stg
|
select user_id, manager_user_id from {table} stg
|
||||||
where app_id = {app_id}
|
where app_id = {app_id}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_hierarchy_info = """
|
_update_hierarchy_info = """
|
||||||
update {table} stg
|
update {table} stg
|
||||||
set org_hierarchy = CASE {org_hierarchy_long_string} END,
|
set org_hierarchy = CASE {org_hierarchy_long_string} END,
|
||||||
org_hierarchy_depth = CASE {org_hierarchy_depth_long_string} END
|
org_hierarchy_depth = CASE {org_hierarchy_depth_long_string} END
|
||||||
where app_id = {app_id} and user_id in ({user_ids})
|
where app_id = {app_id} and user_id in ({user_ids})
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_hierarchy_info_per_row = """
|
_update_hierarchy_info_per_row = """
|
||||||
update {table} stg
|
update {table} stg
|
||||||
set org_hierarchy = '{org_hierarchy}',
|
set org_hierarchy = '{org_hierarchy}',
|
||||||
org_hierarchy_depth = {org_hierarchy_depth}
|
org_hierarchy_depth = {org_hierarchy_depth}
|
||||||
where app_id = {app_id} and user_id = '{user_id}'
|
where app_id = {app_id} and user_id = '{user_id}'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_clear_staging_tempalte = """
|
_clear_staging_tempalte = """
|
||||||
DELETE FROM {table} where app_id = {app_id}
|
DELETE FROM {table} where app_id = {app_id}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||||
args[Constant.WH_DB_USERNAME_KEY],
|
args[Constant.WH_DB_USERNAME_KEY],
|
||||||
args[Constant.WH_DB_PASSWORD_KEY],
|
args[Constant.WH_DB_PASSWORD_KEY],
|
||||||
args[Constant.WH_DB_DRIVER_KEY])
|
args[Constant.WH_DB_DRIVER_KEY])
|
||||||
self.wh_cursor = self.wh_con.cursor()
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||||
self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY])
|
self.group_app_id = int(args[Constant.LDAP_GROUP_APP_ID_KEY])
|
||||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||||
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
||||||
self.ceo_user_id = args[Constant.LDAP_CEO_USER_ID_KEY]
|
self.ceo_user_id = args[Constant.LDAP_CEO_USER_ID_KEY]
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.read_file_to_stg()
|
try:
|
||||||
self.update_null_value()
|
self.read_file_to_stg()
|
||||||
self.update_manager_info()
|
self.update_null_value()
|
||||||
self.update_hierarchy_info()
|
self.update_manager_info()
|
||||||
self.wh_cursor.close()
|
self.update_hierarchy_info()
|
||||||
self.wh_con.close()
|
finally:
|
||||||
|
self.wh_cursor.close()
|
||||||
|
self.wh_con.close()
|
||||||
|
|
||||||
def read_file_to_stg(self):
|
def read_file_to_stg(self):
|
||||||
|
|
||||||
for table in self._tables:
|
for table in self._tables:
|
||||||
t = self._tables[table]
|
t = self._tables[table]
|
||||||
# Clear stagging table
|
# Clear stagging table
|
||||||
query = self._clear_staging_tempalte.format(table=t.get("table"), app_id=self.app_id)
|
query = self._clear_staging_tempalte.format(table=t.get("table"), app_id=self.app_id)
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
# Load file into stagging table
|
# Load file into stagging table
|
||||||
query = self._read_file_template.format(folder=self.metadata_folder, file=t.get("file"), table=t.get("table"), columns=t.get("columns"))
|
query = self._read_file_template.format(folder=self.metadata_folder, file=t.get("file"), table=t.get("table"), columns=t.get("columns"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_null_value(self):
|
def update_null_value(self):
|
||||||
for table in self._tables:
|
for table in self._tables:
|
||||||
t = self._tables[table]
|
t = self._tables[table]
|
||||||
if 'nullif_columns' in t:
|
if 'nullif_columns' in t:
|
||||||
for column in t['nullif_columns']:
|
for column in t['nullif_columns']:
|
||||||
query = self._update_column_to_null_template.format(table=t.get("table"), column=column, column_value=t['nullif_columns'][column], app_id=self.app_id)
|
query = self._update_column_to_null_template.format(table=t.get("table"), column=column, column_value=t['nullif_columns'][column], app_id=self.app_id)
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_manager_info(self):
|
def update_manager_info(self):
|
||||||
t = self._tables["ldap_user"]
|
t = self._tables["ldap_user"]
|
||||||
query = self._update_manager_info.format(table=t.get("table"), app_id=self.app_id)
|
query = self._update_manager_info.format(table=t.get("table"), app_id=self.app_id)
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_hierarchy_info(self):
|
def update_hierarchy_info(self):
|
||||||
t = self._tables["ldap_user"]
|
t = self._tables["ldap_user"]
|
||||||
query = self._get_manager_edge.format(table=t.get("table"), app_id=self.app_id)
|
query = self._get_manager_edge.format(table=t.get("table"), app_id=self.app_id)
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
pair = dict()
|
pair = dict()
|
||||||
hierarchy = dict()
|
hierarchy = dict()
|
||||||
|
|
||||||
for row in self.wh_cursor:
|
for row in self.wh_cursor:
|
||||||
pair[row[0]] = row[1]
|
pair[row[0]] = row[1]
|
||||||
|
|
||||||
for user in pair:
|
for user in pair:
|
||||||
self.find_path_for_user(user, pair, hierarchy)
|
self.find_path_for_user(user, pair, hierarchy)
|
||||||
|
|
||||||
case_org_hierarchy_template = " WHEN user_id = '{user_id}' THEN '{org_hierarchy}' "
|
case_org_hierarchy_template = " WHEN user_id = '{user_id}' THEN '{org_hierarchy}' "
|
||||||
case_org_hierarchy_depth_template = " WHEN user_id = '{user_id}' THEN {org_hierarchy_depth} "
|
case_org_hierarchy_depth_template = " WHEN user_id = '{user_id}' THEN {org_hierarchy_depth} "
|
||||||
user_ids = []
|
user_ids = []
|
||||||
org_hierarchy_long_string = ""
|
org_hierarchy_long_string = ""
|
||||||
org_hierarchy_depth_long_string = ""
|
org_hierarchy_depth_long_string = ""
|
||||||
count = 0
|
count = 0
|
||||||
for user in hierarchy:
|
for user in hierarchy:
|
||||||
if hierarchy[user] is not None:
|
if hierarchy[user] is not None:
|
||||||
user_ids.append("'" + user + "'")
|
user_ids.append("'" + user + "'")
|
||||||
org_hierarchy_long_string += case_org_hierarchy_template.format(user_id=user, org_hierarchy=hierarchy[user][0])
|
org_hierarchy_long_string += case_org_hierarchy_template.format(user_id=user, org_hierarchy=hierarchy[user][0])
|
||||||
org_hierarchy_depth_long_string += case_org_hierarchy_depth_template.format(user_id=user, org_hierarchy_depth=hierarchy[user][1])
|
org_hierarchy_depth_long_string += case_org_hierarchy_depth_template.format(user_id=user, org_hierarchy_depth=hierarchy[user][1])
|
||||||
count += 1
|
count += 1
|
||||||
if count % 1000 == 0:
|
if count % 1000 == 0:
|
||||||
query = self._update_hierarchy_info.format(table=t.get("table"), app_id=self.app_id, user_ids=",".join(user_ids), org_hierarchy_long_string=org_hierarchy_long_string, org_hierarchy_depth_long_string=org_hierarchy_depth_long_string)
|
query = self._update_hierarchy_info.format(table=t.get("table"), app_id=self.app_id, user_ids=",".join(user_ids), org_hierarchy_long_string=org_hierarchy_long_string,
|
||||||
# print query
|
org_hierarchy_depth_long_string=org_hierarchy_depth_long_string)
|
||||||
self.wh_cursor.executemany(query)
|
# print query
|
||||||
user_ids = []
|
self.wh_cursor.executemany(query)
|
||||||
org_hierarchy_long_string = ""
|
user_ids = []
|
||||||
org_hierarchy_depth_long_string = ""
|
org_hierarchy_long_string = ""
|
||||||
self.wh_con.commit()
|
org_hierarchy_depth_long_string = ""
|
||||||
|
self.wh_con.commit()
|
||||||
|
|
||||||
def find_path_for_user(self, start, pair, hierarchy):
|
def find_path_for_user(self, start, pair, hierarchy):
|
||||||
if start in hierarchy:
|
if start in hierarchy:
|
||||||
return hierarchy[start]
|
return hierarchy[start]
|
||||||
|
|
||||||
if start == self.ceo_user_id:
|
if start == self.ceo_user_id:
|
||||||
return "/" + start, 0
|
return "/" + start, 0
|
||||||
|
|
||||||
if start is None:
|
if start is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
next = self.find_path_for_user(pair[start], pair, hierarchy)
|
next = self.find_path_for_user(pair[start], pair, hierarchy)
|
||||||
|
|
||||||
if next:
|
if next:
|
||||||
current = next[0] + "/" + start, next[1] + 1
|
current = next[0] + "/" + start, next[1] + 1
|
||||||
else:
|
else:
|
||||||
current = None
|
current = None
|
||||||
|
|
||||||
|
hierarchy[start] = current
|
||||||
|
return current
|
||||||
|
|
||||||
hierarchy[start] = current
|
|
||||||
return current
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
lt = LdapTransform(props)
|
lt = LdapTransform(props)
|
||||||
lt.run()
|
lt.run()
|
||||||
|
@ -63,15 +63,17 @@ class OozieExtract:
|
|||||||
print "Oozie version: ", self.oz_version[0]
|
print "Oozie version: ", self.oz_version[0]
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.collect_flow_jobs(self.metadata_folder + "/flow.csv",
|
try:
|
||||||
self.metadata_folder + "/job.csv",
|
self.collect_flow_jobs(self.metadata_folder + "/flow.csv",
|
||||||
self.metadata_folder + "/dag.csv")
|
self.metadata_folder + "/job.csv",
|
||||||
self.collect_flow_owners(self.metadata_folder + "/owner.csv")
|
self.metadata_folder + "/dag.csv")
|
||||||
self.collect_flow_schedules(self.metadata_folder + "/schedule.csv")
|
self.collect_flow_owners(self.metadata_folder + "/owner.csv")
|
||||||
self.collect_flow_execs(self.metadata_folder + "/flow_exec.csv", self.lookback_period)
|
self.collect_flow_schedules(self.metadata_folder + "/schedule.csv")
|
||||||
self.collect_job_execs(self.metadata_folder + "/job_exec.csv", self.lookback_period)
|
self.collect_flow_execs(self.metadata_folder + "/flow_exec.csv", self.lookback_period)
|
||||||
self.oz_cursor.close()
|
self.collect_job_execs(self.metadata_folder + "/job_exec.csv", self.lookback_period)
|
||||||
self.oz_con.close()
|
finally:
|
||||||
|
self.oz_cursor.close()
|
||||||
|
self.oz_con.close()
|
||||||
|
|
||||||
def collect_flow_jobs(self, flow_file, job_file, dag_file):
|
def collect_flow_jobs(self, flow_file, job_file, dag_file):
|
||||||
print "collect flow&jobs"
|
print "collect flow&jobs"
|
||||||
|
@ -22,25 +22,48 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class OwnerLoad:
|
class OwnerLoad:
|
||||||
|
def __init__(self, args):
|
||||||
|
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||||
|
args[Constant.WH_DB_USERNAME_KEY],
|
||||||
|
args[Constant.WH_DB_PASSWORD_KEY],
|
||||||
|
args[Constant.WH_DB_DRIVER_KEY])
|
||||||
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
|
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
|
||||||
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||||
|
|
||||||
def __init__(self, args):
|
def run(self):
|
||||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
try:
|
||||||
args[Constant.WH_DB_USERNAME_KEY],
|
cmd = """
|
||||||
args[Constant.WH_DB_PASSWORD_KEY],
|
INSERT INTO dataset_owner (dataset_id, dataset_urn, owner_id, sort_id, namespace, app_id, owner_type, owner_sub_type, db_ids, is_group, is_active, source_time, created_time, wh_etl_exec_id)
|
||||||
args[Constant.WH_DB_DRIVER_KEY])
|
SELECT * FROM (SELECT dataset_id, dataset_urn, owner_id, sort_id, namespace, app_id, owner_type, owner_sub_type, group_concat(db_id ORDER BY db_id SEPARATOR ",") db_ids, is_group, is_active, source_time, unix_timestamp(NOW()) time_created, {wh_etl_exec_id}
|
||||||
self.wh_cursor = self.wh_con.cursor()
|
FROM stg_dataset_owner s
|
||||||
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
|
WHERE s.dataset_id is not null and s.owner_id is not null and s.owner_id != '' and s.app_id is not null
|
||||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
GROUP BY s.dataset_id, s.owner_id, s.sort_id, s.namespace, s.owner_type, s.owner_sub_type) sb
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
dataset_urn = sb.dataset_urn,
|
||||||
|
sort_id = COALESCE(@sort_id, sb.sort_id),
|
||||||
|
owner_type = COALESCE(@owner_type, sb.owner_type),
|
||||||
|
owner_sub_type = COALESCE(@owner_sub_type, sb.owner_sub_type),
|
||||||
|
app_id = sb.app_id,
|
||||||
|
is_active = sb.is_active,
|
||||||
|
db_ids = sb.db_ids,
|
||||||
|
source_time = sb.source_time,
|
||||||
|
wh_etl_exec_id = {wh_etl_exec_id},
|
||||||
|
modified_time = unix_timestamp(NOW())
|
||||||
|
""".format(wh_etl_exec_id=self.wh_exec_id)
|
||||||
|
print cmd
|
||||||
|
self.wh_cursor.execute(cmd)
|
||||||
|
self.wh_con.commit()
|
||||||
|
|
||||||
def run(self):
|
# matching parent level urns
|
||||||
cmd = """
|
template = """
|
||||||
INSERT INTO dataset_owner (dataset_id, dataset_urn, owner_id, sort_id, namespace, app_id, owner_type, owner_sub_type, db_ids, is_group, is_active, source_time, created_time, wh_etl_exec_id)
|
INSERT INTO dataset_owner (dataset_id, dataset_urn, owner_id, sort_id, namespace, app_id, owner_type, owner_sub_type, db_ids, is_group, is_active, source_time, created_time, wh_etl_exec_id)
|
||||||
SELECT * FROM (SELECT dataset_id, dataset_urn, owner_id, sort_id, namespace, app_id, owner_type, owner_sub_type, group_concat(db_id ORDER BY db_id SEPARATOR ",") db_ids, is_group, is_active, source_time, unix_timestamp(NOW()) time_created, {wh_etl_exec_id}
|
select * FROM (select distinct d.id, d.urn, s.owner_id, s.sort_id, s.namespace, s.app_id, s.owner_type, owner_sub_type, group_concat(s.db_id ORDER BY db_id SEPARATOR ",") db_ids, s.is_group, s.is_active, s.source_time, unix_timestamp(NOW()) time_created, {wh_etl_exec_id}
|
||||||
FROM stg_dataset_owner s
|
from stg_dataset_owner s join dict_dataset d on s.dataset_urn = substring(d.urn, 1, char_length(d.urn) - char_length(substring_index(d.urn, '/', -{lvl})) - 1)
|
||||||
WHERE s.dataset_id is not null and s.owner_id is not null and s.owner_id != '' and s.app_id is not null
|
WHERE s.owner_id is not null and s.owner_id != '' and s.app_id is not null
|
||||||
GROUP BY s.dataset_id, s.owner_id, s.sort_id, s.namespace, s.owner_type, s.owner_sub_type) sb
|
group by d.id, s.owner_id, s.sort_id, s.namespace, s.owner_type, s.owner_sub_type) sb
|
||||||
ON DUPLICATE KEY UPDATE
|
ON DUPLICATE KEY UPDATE
|
||||||
dataset_urn = sb.dataset_urn,
|
dataset_urn = sb.urn,
|
||||||
sort_id = COALESCE(@sort_id, sb.sort_id),
|
sort_id = COALESCE(@sort_id, sb.sort_id),
|
||||||
owner_type = COALESCE(@owner_type, sb.owner_type),
|
owner_type = COALESCE(@owner_type, sb.owner_type),
|
||||||
owner_sub_type = COALESCE(@owner_sub_type, sb.owner_sub_type),
|
owner_sub_type = COALESCE(@owner_sub_type, sb.owner_sub_type),
|
||||||
@ -50,68 +73,46 @@ class OwnerLoad:
|
|||||||
source_time = sb.source_time,
|
source_time = sb.source_time,
|
||||||
wh_etl_exec_id = {wh_etl_exec_id},
|
wh_etl_exec_id = {wh_etl_exec_id},
|
||||||
modified_time = unix_timestamp(NOW())
|
modified_time = unix_timestamp(NOW())
|
||||||
""".format(wh_etl_exec_id=self.wh_exec_id)
|
"""
|
||||||
|
|
||||||
|
for l in range(1, 6):
|
||||||
|
cmd = template.format(wh_etl_exec_id=self.wh_exec_id, lvl=l)
|
||||||
print cmd
|
print cmd
|
||||||
self.wh_cursor.execute(cmd)
|
self.wh_cursor.execute(cmd)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
# matching parent level urns
|
# put all unmatched dataset in to another table for future reference
|
||||||
template = """
|
|
||||||
INSERT INTO dataset_owner (dataset_id, dataset_urn, owner_id, sort_id, namespace, app_id, owner_type, owner_sub_type, db_ids, is_group, is_active, source_time, created_time, wh_etl_exec_id)
|
|
||||||
select * FROM (select distinct d.id, d.urn, s.owner_id, s.sort_id, s.namespace, s.app_id, s.owner_type, owner_sub_type, group_concat(s.db_id ORDER BY db_id SEPARATOR ",") db_ids, s.is_group, s.is_active, s.source_time, unix_timestamp(NOW()) time_created, {wh_etl_exec_id}
|
|
||||||
from stg_dataset_owner s join dict_dataset d on s.dataset_urn = substring(d.urn, 1, char_length(d.urn) - char_length(substring_index(d.urn, '/', -{lvl})) - 1)
|
|
||||||
WHERE s.owner_id is not null and s.owner_id != '' and s.app_id is not null
|
|
||||||
group by d.id, s.owner_id, s.sort_id, s.namespace, s.owner_type, s.owner_sub_type) sb
|
|
||||||
ON DUPLICATE KEY UPDATE
|
|
||||||
dataset_urn = sb.urn,
|
|
||||||
sort_id = COALESCE(@sort_id, sb.sort_id),
|
|
||||||
owner_type = COALESCE(@owner_type, sb.owner_type),
|
|
||||||
owner_sub_type = COALESCE(@owner_sub_type, sb.owner_sub_type),
|
|
||||||
app_id = sb.app_id,
|
|
||||||
is_active = sb.is_active,
|
|
||||||
db_ids = sb.db_ids,
|
|
||||||
source_time = sb.source_time,
|
|
||||||
wh_etl_exec_id = {wh_etl_exec_id},
|
|
||||||
modified_time = unix_timestamp(NOW())
|
|
||||||
"""
|
|
||||||
|
|
||||||
for l in range(1, 6):
|
cmd = """
|
||||||
cmd = template.format(wh_etl_exec_id=self.wh_exec_id, lvl=l)
|
INSERT INTO stg_dataset_owner_unmatched (dataset_urn, owner_id, sort_id, app_id, namespace, owner_type, owner_sub_type, is_group, db_name, db_id, is_active, source_time)
|
||||||
print cmd
|
SELECT dataset_urn, owner_id, sort_id, app_id, namespace, owner_type, owner_sub_type, is_group, db_name, db_id, is_active, source_time
|
||||||
self.wh_cursor.execute(cmd)
|
FROM stg_dataset_owner s where dataset_id is null and is_parent_urn = 'N'
|
||||||
self.wh_con.commit()
|
ON DUPLICATE KEY UPDATE
|
||||||
|
sort_id = s.sort_id,
|
||||||
|
owner_type = s.owner_type,
|
||||||
|
owner_sub_type = s.owner_sub_type,
|
||||||
|
is_active = s.is_active,
|
||||||
|
source_time = s.source_time;
|
||||||
|
"""
|
||||||
|
self.wh_cursor.execute(cmd)
|
||||||
|
self.wh_con.commit()
|
||||||
|
|
||||||
# put all unmatched dataset in to another table for future reference
|
# delete the entries that matched with dataset id in this round
|
||||||
|
|
||||||
cmd = """
|
cmd = """
|
||||||
INSERT INTO stg_dataset_owner_unmatched (dataset_urn, owner_id, sort_id, app_id, namespace, owner_type, owner_sub_type, is_group, db_name, db_id, is_active, source_time)
|
DELETE u FROM stg_dataset_owner_unmatched u
|
||||||
SELECT dataset_urn, owner_id, sort_id, app_id, namespace, owner_type, owner_sub_type, is_group, db_name, db_id, is_active, source_time
|
JOIN (SELECT DISTINCT dataset_urn, dataset_id FROM stg_dataset_owner) s
|
||||||
FROM stg_dataset_owner s where dataset_id is null and is_parent_urn = 'N'
|
ON u.dataset_urn = s.dataset_urn
|
||||||
ON DUPLICATE KEY UPDATE
|
WHERE s.dataset_id IS NOT NULL;
|
||||||
sort_id = s.sort_id,
|
"""
|
||||||
owner_type = s.owner_type,
|
self.wh_cursor.execute(cmd)
|
||||||
owner_sub_type = s.owner_sub_type,
|
self.wh_con.commit()
|
||||||
is_active = s.is_active,
|
finally:
|
||||||
source_time = s.source_time;
|
self.wh_cursor.close()
|
||||||
"""
|
self.wh_con.close()
|
||||||
self.wh_cursor.execute(cmd)
|
|
||||||
self.wh_con.commit()
|
|
||||||
|
|
||||||
# delete the entries that matched with dataset id in this round
|
|
||||||
|
|
||||||
cmd = """
|
|
||||||
DELETE u FROM stg_dataset_owner_unmatched u
|
|
||||||
JOIN (SELECT DISTINCT dataset_urn, dataset_id FROM stg_dataset_owner) s
|
|
||||||
ON u.dataset_urn = s.dataset_urn
|
|
||||||
WHERE s.dataset_id IS NOT NULL;
|
|
||||||
"""
|
|
||||||
self.wh_cursor.execute(cmd)
|
|
||||||
self.wh_con.commit()
|
|
||||||
|
|
||||||
self.wh_cursor.close()
|
|
||||||
self.wh_con.close()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
ot = OwnerLoad(props)
|
ot = OwnerLoad(props)
|
||||||
ot.run()
|
ot.run()
|
||||||
|
@ -20,16 +20,16 @@ import sys
|
|||||||
|
|
||||||
|
|
||||||
class OwnerTransform:
|
class OwnerTransform:
|
||||||
_tables = {"dataset_owner": {"columns": "dataset_urn, owner_id, sort_id, namespace, db_name, source_time",
|
_tables = {"dataset_owner": {"columns": "dataset_urn, owner_id, sort_id, namespace, db_name, source_time",
|
||||||
"file": "dataset_owner.csv",
|
"file": "dataset_owner.csv",
|
||||||
"table": "stg_dataset_owner"}
|
"table": "stg_dataset_owner"}
|
||||||
}
|
}
|
||||||
|
|
||||||
_clear_staging_tempalte = """
|
_clear_staging_tempalte = """
|
||||||
DELETE FROM {table}
|
DELETE FROM {table}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_read_file_template = """
|
_read_file_template = """
|
||||||
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
||||||
INTO TABLE {table}
|
INTO TABLE {table}
|
||||||
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
||||||
@ -37,21 +37,21 @@ class OwnerTransform:
|
|||||||
({columns});
|
({columns});
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_dataset_id_template = """
|
_update_dataset_id_template = """
|
||||||
UPDATE {table} stg
|
UPDATE {table} stg
|
||||||
JOIN dict_dataset dd
|
JOIN dict_dataset dd
|
||||||
ON stg.dataset_urn = dd.urn
|
ON stg.dataset_urn = dd.urn
|
||||||
SET stg.dataset_id = dd.id
|
SET stg.dataset_id = dd.id
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_database_id_template = """
|
_update_database_id_template = """
|
||||||
UPDATE {table} stg
|
UPDATE {table} stg
|
||||||
JOIN cfg_database cd
|
JOIN cfg_database cd
|
||||||
ON stg.db_name = cd.db_code
|
ON stg.db_name = cd.db_code
|
||||||
SET stg.db_id = cd.db_id
|
SET stg.db_id = cd.db_id
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_app_id_template = """
|
_update_app_id_template = """
|
||||||
UPDATE {table} stg
|
UPDATE {table} stg
|
||||||
join dir_external_user_info ldap
|
join dir_external_user_info ldap
|
||||||
on stg.owner_id = ldap.user_id
|
on stg.owner_id = ldap.user_id
|
||||||
@ -60,7 +60,7 @@ class OwnerTransform:
|
|||||||
stg.is_active = ldap.is_active
|
stg.is_active = ldap.is_active
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_group_app_id_template = """
|
_update_group_app_id_template = """
|
||||||
UPDATE {table} stg
|
UPDATE {table} stg
|
||||||
join dir_external_group_user_map ldap
|
join dir_external_group_user_map ldap
|
||||||
on stg.owner_id = ldap.group_id
|
on stg.owner_id = ldap.group_id
|
||||||
@ -69,7 +69,7 @@ class OwnerTransform:
|
|||||||
stg.is_active = 'Y'
|
stg.is_active = 'Y'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_owner_type_template = """
|
_update_owner_type_template = """
|
||||||
UPDATE {table} stg
|
UPDATE {table} stg
|
||||||
join dir_external_user_info ldap
|
join dir_external_user_info ldap
|
||||||
on stg.owner_id = ldap.user_id
|
on stg.owner_id = ldap.user_id
|
||||||
@ -77,88 +77,91 @@ class OwnerTransform:
|
|||||||
stg.owner_sub_type = CASE WHEN ldap.department_id = 4020 THEN 'DWH' ELSE 'BA' END
|
stg.owner_sub_type = CASE WHEN ldap.department_id = 4020 THEN 'DWH' ELSE 'BA' END
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_update_parent_flag = """
|
_update_parent_flag = """
|
||||||
update {table} s
|
update {table} s
|
||||||
join dict_dataset d on s.dataset_urn = substring(d.urn, 1, char_length(d.urn) - char_length(substring_index(d.urn, '/', -{lvl})) - 1)
|
join dict_dataset d on s.dataset_urn = substring(d.urn, 1, char_length(d.urn) - char_length(substring_index(d.urn, '/', -{lvl})) - 1)
|
||||||
set s.is_parent_urn = 'Y'
|
set s.is_parent_urn = 'Y'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, args):
|
def __init__(self, args):
|
||||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||||
args[Constant.WH_DB_USERNAME_KEY],
|
args[Constant.WH_DB_USERNAME_KEY],
|
||||||
args[Constant.WH_DB_PASSWORD_KEY],
|
args[Constant.WH_DB_PASSWORD_KEY],
|
||||||
args[Constant.WH_DB_DRIVER_KEY])
|
args[Constant.WH_DB_DRIVER_KEY])
|
||||||
self.wh_cursor = self.wh_con.cursor()
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
self.db_id = int(args[Constant.DB_ID_KEY])
|
self.db_id = int(args[Constant.DB_ID_KEY])
|
||||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||||
self.metadata_folder = self.app_folder + "/" + str(self.db_id)
|
self.metadata_folder = self.app_folder + "/" + str(self.db_id)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.read_file_to_stg()
|
try:
|
||||||
self.update_dataset_id()
|
self.read_file_to_stg()
|
||||||
self.update_database_id()
|
self.update_dataset_id()
|
||||||
self.update_app_id()
|
self.update_database_id()
|
||||||
self.update_owner_type()
|
self.update_app_id()
|
||||||
self.wh_cursor.close()
|
self.update_owner_type()
|
||||||
self.wh_con.close()
|
finally:
|
||||||
|
self.wh_cursor.close()
|
||||||
|
self.wh_con.close()
|
||||||
|
|
||||||
def read_file_to_stg(self):
|
def read_file_to_stg(self):
|
||||||
t = self._tables["dataset_owner"]
|
t = self._tables["dataset_owner"]
|
||||||
|
|
||||||
# Clear stagging table
|
# Clear stagging table
|
||||||
query = self._clear_staging_tempalte.format(table=t.get("table"))
|
query = self._clear_staging_tempalte.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
# Load file into stagging table
|
# Load file into stagging table
|
||||||
query = self._read_file_template.format(folder=self.metadata_folder, file=t.get("file"), table=t.get("table"), columns=t.get("columns"))
|
query = self._read_file_template.format(folder=self.metadata_folder, file=t.get("file"), table=t.get("table"), columns=t.get("columns"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_dataset_id(self):
|
def update_dataset_id(self):
|
||||||
t = self._tables["dataset_owner"]
|
t = self._tables["dataset_owner"]
|
||||||
query = self._update_dataset_id_template.format(table=t.get("table"))
|
query = self._update_dataset_id_template.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_database_id(self):
|
def update_database_id(self):
|
||||||
t = self._tables["dataset_owner"]
|
t = self._tables["dataset_owner"]
|
||||||
query = self._update_database_id_template.format(table=t.get("table"))
|
query = self._update_database_id_template.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_app_id(self):
|
def update_app_id(self):
|
||||||
t = self._tables["dataset_owner"]
|
t = self._tables["dataset_owner"]
|
||||||
query = self._update_app_id_template.format(table=t.get("table"))
|
query = self._update_app_id_template.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
query = self._update_group_app_id_template.format(table=t.get("table"))
|
query = self._update_group_app_id_template.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_owner_type(self):
|
def update_owner_type(self):
|
||||||
t = self._tables["dataset_owner"]
|
t = self._tables["dataset_owner"]
|
||||||
query = self._update_owner_type_template.format(table=t.get("table"))
|
query = self._update_owner_type_template.format(table=t.get("table"))
|
||||||
print query
|
print query
|
||||||
self.wh_cursor.execute(query)
|
self.wh_cursor.execute(query)
|
||||||
self.wh_con.commit()
|
self.wh_con.commit()
|
||||||
|
|
||||||
|
def update_parent_flag(self):
|
||||||
|
t = self._tables["dataset_owner"]
|
||||||
|
for l in range(1, 6):
|
||||||
|
query = self._update_parent_flag.format(table=t.get("table"), lvl=l)
|
||||||
|
print query
|
||||||
|
self.wh_cursor.execute(query)
|
||||||
|
self.wh_con.commit()
|
||||||
|
|
||||||
def update_parent_flag(self):
|
|
||||||
t = self._tables["dataset_owner"]
|
|
||||||
for l in range(1, 6):
|
|
||||||
query = self._update_parent_flag.format(table=t.get("table"), lvl=l)
|
|
||||||
print query
|
|
||||||
self.wh_cursor.execute(query)
|
|
||||||
self.wh_con.commit()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
props = sys.argv[1]
|
props = sys.argv[1]
|
||||||
ot = OwnerTransform(props)
|
ot = OwnerTransform(props)
|
||||||
ot.run()
|
ot.run()
|
||||||
|
@ -32,15 +32,17 @@ class SchedulerLoad:
|
|||||||
self.wh_cursor = self.wh_con.cursor()
|
self.wh_cursor = self.wh_con.cursor()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.load_flows()
|
try:
|
||||||
self.load_jobs()
|
self.load_flows()
|
||||||
self.load_flow_dags()
|
self.load_jobs()
|
||||||
self.load_flow_schedules()
|
self.load_flow_dags()
|
||||||
self.load_flow_owner_permissions()
|
self.load_flow_schedules()
|
||||||
self.load_flow_executions()
|
self.load_flow_owner_permissions()
|
||||||
self.load_job_executions()
|
self.load_flow_executions()
|
||||||
self.wh_cursor.close()
|
self.load_job_executions()
|
||||||
self.wh_con.close()
|
finally:
|
||||||
|
self.wh_cursor.close()
|
||||||
|
self.wh_con.close()
|
||||||
|
|
||||||
def load_flows(self):
|
def load_flows(self):
|
||||||
cmd = """
|
cmd = """
|
||||||
|
@ -84,6 +84,7 @@ class SchedulerTransform:
|
|||||||
self.metadata_folder = self.app_folder + "/" + str(scheduler_type) + "/" + str(self.app_id)
|
self.metadata_folder = self.app_folder + "/" + str(scheduler_type) + "/" + str(self.app_id)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
|
try:
|
||||||
self.read_flow_file_to_stg()
|
self.read_flow_file_to_stg()
|
||||||
self.read_job_file_to_stg()
|
self.read_job_file_to_stg()
|
||||||
self.read_dag_file_to_stg()
|
self.read_dag_file_to_stg()
|
||||||
@ -91,6 +92,7 @@ class SchedulerTransform:
|
|||||||
self.read_flow_schedule_file_to_stg()
|
self.read_flow_schedule_file_to_stg()
|
||||||
self.read_flow_exec_file_to_stg()
|
self.read_flow_exec_file_to_stg()
|
||||||
self.read_job_exec_file_to_stg()
|
self.read_job_exec_file_to_stg()
|
||||||
|
finally:
|
||||||
self.wh_cursor.close()
|
self.wh_cursor.close()
|
||||||
self.wh_con.close()
|
self.wh_con.close()
|
||||||
|
|
||||||
|
@ -538,14 +538,16 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
e = TeradataExtract()
|
e = TeradataExtract()
|
||||||
e.conn_td = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
|
e.conn_td = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
|
||||||
e.conn_td.cursor().execute("SET QUERY_BAND = 'script=%s; pid=%d; ' FOR SESSION;" % ('TeradataExtract.py', os.getpid()))
|
try:
|
||||||
e.conn_td.commit()
|
e.conn_td.cursor().execute("SET QUERY_BAND = 'script=%s; pid=%d; ' FOR SESSION;" % ('TeradataExtract.py', os.getpid()))
|
||||||
e.log_file = args[Constant.TD_LOG_KEY]
|
e.conn_td.commit()
|
||||||
e.databases = args[Constant.TD_TARGET_DATABASES_KEY].split(',')
|
e.log_file = args[Constant.TD_LOG_KEY]
|
||||||
e.default_database = args[Constant.TD_DEFAULT_DATABASE_KEY]
|
e.databases = args[Constant.TD_TARGET_DATABASES_KEY].split(',')
|
||||||
index_type = {'P': 'Primary Index', 'K': 'Primary Key', 'S': 'Secondary Index', 'Q': 'Partitioned Primary Index',
|
e.default_database = args[Constant.TD_DEFAULT_DATABASE_KEY]
|
||||||
'J': 'Join Index', 'U': 'Unique Index'}
|
index_type = {'P': 'Primary Index', 'K': 'Primary Key', 'S': 'Secondary Index', 'Q': 'Partitioned Primary Index',
|
||||||
|
'J': 'Join Index', 'U': 'Unique Index'}
|
||||||
|
|
||||||
e.run(None, None, args[Constant.TD_SCHEMA_OUTPUT_KEY], args[Constant.TD_SAMPLE_OUTPUT_KEY])
|
e.run(None, None, args[Constant.TD_SCHEMA_OUTPUT_KEY], args[Constant.TD_SAMPLE_OUTPUT_KEY])
|
||||||
e.conn_td.close()
|
finally:
|
||||||
|
e.conn_td.close()
|
||||||
|
|
||||||
|
@ -253,7 +253,9 @@ if __name__ == "__main__":
|
|||||||
l.db_id = args[Constant.DB_ID_KEY]
|
l.db_id = args[Constant.DB_ID_KEY]
|
||||||
l.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
|
l.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
|
||||||
l.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
|
l.conn_mysql = zxJDBC.connect(JDBC_URL, username, password, JDBC_DRIVER)
|
||||||
l.load_metadata()
|
try:
|
||||||
l.load_field()
|
l.load_metadata()
|
||||||
l.load_sample()
|
l.load_field()
|
||||||
l.conn_mysql.close()
|
l.load_sample()
|
||||||
|
finally:
|
||||||
|
l.conn_mysql.close()
|
||||||
|
@ -17,8 +17,10 @@ import java.io.File;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.eclipse.jgit.api.Git;
|
import org.eclipse.jgit.api.Git;
|
||||||
import org.eclipse.jgit.api.errors.GitAPIException;
|
import org.eclipse.jgit.api.errors.GitAPIException;
|
||||||
@ -80,15 +82,18 @@ public class GitUtil {
|
|||||||
* @return List of path of repositories e.g. project/repo
|
* @return List of path of repositories e.g. project/repo
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static List<String> getRepoListFromProject(String projectUrl) throws IOException {
|
public static Map<String, String> getRepoListFromProject(String projectUrl) throws IOException {
|
||||||
|
|
||||||
List<String> repoList = new LinkedList<>();
|
Map<String, String> repoList = new HashMap<>();
|
||||||
Document doc = Jsoup.connect(projectUrl).get();
|
Document doc = Jsoup.connect(projectUrl).data("format", "xml").get();
|
||||||
Elements repos = doc.getElementsByClass("repository");
|
Elements repos = doc.getElementsByTag("repositories");
|
||||||
|
Elements mainlines = repos.first().getElementsByTag("mainlines");
|
||||||
|
Elements repo = mainlines.first().getElementsByTag("repository");
|
||||||
|
|
||||||
for (Element e : repos) {
|
for (Element e : repo) {
|
||||||
String repo = e.children().first().text();
|
String repoName = e.getElementsByTag("name").first().text();
|
||||||
repoList.add(repo.trim());
|
String repoUrl = e.getElementsByTag("clone_url").first().text();
|
||||||
|
repoList.put(repoName.trim(), repoUrl.trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
return repoList;
|
return repoList;
|
||||||
|
@ -29,7 +29,7 @@ public class GitUtilTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testGetRepoListFromProject()
|
public void testGetRepoListFromProject()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
//List<String> repos = GitUtil.getRepoListFromProject("git://git.example.com/project");
|
//Map<String, String> repos = GitUtil.getRepoListFromProject("git://git.example.com/project");
|
||||||
//Assert.assertTrue(repos.size() > 0);
|
//Assert.assertTrue(repos.size() > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user