Use WH_APP_FOLDER property as the base directory for temp files for various ETL jobs (#451)

* Use WH_APP_FOLDER property as the base directory for temp files in Oracle ETL, instead of the full path defined in wh_etl_job_property, which is often /var/tmp/{something}.

* Move common code to FileUtil.py and move Voldamort's temp files too.

* Move Kafaka ETL temp files.

* Move Espresso ETL temp files.

* Move Multiproduct ETL temp files.

* Move CodeSearch ETL temp files.

* Move teradata ETL temp files.
This commit is contained in:
Mars Lan 2017-04-25 15:11:02 -07:00
parent 27d12973e0
commit 35c0781f59
23 changed files with 211 additions and 92 deletions

View File

@ -12,14 +12,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys,os,re import os
import re
import requests import requests
import subprocess import subprocess
import sys
from wherehows.common import Constant from wherehows.common import Constant
from wherehows.common.schemas import SCMOwnerRecord from wherehows.common.schemas import SCMOwnerRecord
from wherehows.common.writers import FileWriter from wherehows.common.writers import FileWriter
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class CodeSearchExtract: class CodeSearchExtract:
""" """
@ -33,10 +38,13 @@ class CodeSearchExtract:
# limit_multiproduct = None # limit_multiproduct = None
# limit_plugin = None # limit_plugin = None
def __init__(self): def __init__(self, args):
self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)
self.base_url = args[Constant.BASE_URL_KEY] self.base_url = args[Constant.BASE_URL_KEY]
self.code_search_committer_writer = FileWriter(args[Constant.DATABASE_SCM_REPO_OUTPUT_KEY])
temp_dir = FileUtil.etl_temp_dir(args, "CODESEARCH")
self.code_search_committer_writer = FileWriter(
os.path.join(temp_dir, args[Constant.DATABASE_SCM_REPO_OUTPUT_KEY]))
def run(self): def run(self):
offset_min = 1 offset_min = 1
@ -191,5 +199,5 @@ class CodeSearchExtract:
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
e = CodeSearchExtract() e = CodeSearchExtract(args)
e.run() e.run()

View File

@ -12,10 +12,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import datetime
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import sys, os, datetime
import FileUtil
class CodeSearchLoad: class CodeSearchLoad:
@ -26,7 +30,9 @@ class CodeSearchLoad:
password = args[Constant.WH_DB_PASSWORD_KEY] password = args[Constant.WH_DB_PASSWORD_KEY]
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
self.database_scm_repo_file = args[Constant.DATABASE_SCM_REPO_OUTPUT_KEY]
temp_dir = FileUtil.etl_temp_dir(args, "CODESEARCH")
self.database_scm_repo_file = os.path.join(temp_dir, args[Constant.DATABASE_SCM_REPO_OUTPUT_KEY])
self.app_id = args[Constant.APP_ID_KEY] self.app_id = args[Constant.APP_ID_KEY]
self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]

View File

@ -12,20 +12,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, json import json
import os
import sys
from datetime import datetime from datetime import datetime
from jython import requests from jython import requests
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class EspressoExtract: class EspressoExtract:
def __init__(self): def __init__(self, args):
self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)
requests.packages.urllib3.disable_warnings() requests.packages.urllib3.disable_warnings()
self.output_file = open(args[Constant.ESPRESSO_OUTPUT_KEY], 'w') temp_dir = FileUtil.etl_temp_dir(args, "ESPRESSO")
self.output_file = open(os.path.join(temp_dir, args[Constant.ESPRESSO_OUTPUT_KEY]), 'w')
self.d2_proxys = [] self.d2_proxys = []
proxy_urls = [x.strip() for x in args[Constant.D2_PROXY_URL].split(',')] proxy_urls = [x.strip() for x in args[Constant.D2_PROXY_URL].split(',')]
@ -111,5 +117,5 @@ class EspressoExtract:
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
e = EspressoExtract() e = EspressoExtract(args)
e.run() e.run()

View File

@ -12,11 +12,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, datetime, json import datetime
import json
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class EspressoTransform: class EspressoTransform:
@ -28,7 +34,8 @@ class EspressoTransform:
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
self.input_file = open(args[Constant.ESPRESSO_OUTPUT_KEY], 'r') temp_dir = FileUtil.etl_temp_dir(args, "ESPRESSO")
self.input_file = open(os.path.join(temp_dir, args[Constant.ESPRESSO_OUTPUT_KEY]), 'r')
self.db_id = args[Constant.DB_ID_KEY] self.db_id = args[Constant.DB_ID_KEY]
self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]

View File

@ -0,0 +1,26 @@
#
# Copyright 2015 LinkedIn Corp. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
import os
from wherehows.common import Constant
def etl_temp_dir(args, etl_type):
dir = os.path.join(args[Constant.WH_APP_FOLDER_KEY], etl_type, args[Constant.WH_EXEC_ID_KEY])
if not os.path.exists(dir):
os.makedirs(dir)
return dir

View File

@ -12,20 +12,27 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, json, re import json
import re
import os
import sys
from datetime import datetime from datetime import datetime
from jython import requests from jython import requests
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class KafkaExtract: class KafkaExtract:
def __init__(self): def __init__(self, args):
self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)
requests.packages.urllib3.disable_warnings() requests.packages.urllib3.disable_warnings()
self.output_file = open(args[Constant.KAFKA_OUTPUT_KEY], 'w') temp_dir = FileUtil.etl_temp_dir(args, "KAFKA")
self.output_file = open(os.path.join(temp_dir, args[Constant.KAFKA_OUTPUT_KEY]), 'w')
self.d2_proxys = [] self.d2_proxys = []
proxy_urls = [x.strip() for x in args[Constant.D2_PROXY_URL].split(',')] proxy_urls = [x.strip() for x in args[Constant.D2_PROXY_URL].split(',')]
@ -112,5 +119,5 @@ class KafkaExtract:
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
e = KafkaExtract() e = KafkaExtract(args)
e.run() e.run()

View File

@ -12,11 +12,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, datetime, json import datetime
import json
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class KafkaTransform: class KafkaTransform:
@ -28,7 +33,8 @@ class KafkaTransform:
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
self.input_file = open(args[Constant.KAFKA_OUTPUT_KEY], 'r') temp_dir = FileUtil.etl_temp_dir(args, "KAFKA")
self.input_file = open(os.path.join(temp_dir, args[Constant.KAFKA_OUTPUT_KEY]), 'r')
self.db_id = args[Constant.DB_ID_KEY] self.db_id = args[Constant.DB_ID_KEY]
self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]

View File

@ -12,9 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, os, re import os
import re
import sys
import datetime import datetime
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from jython import requests from jython import requests
from wherehows.common import Constant from wherehows.common import Constant
from wherehows.common.schemas import MultiproductProjectRecord from wherehows.common.schemas import MultiproductProjectRecord
@ -23,17 +26,21 @@ from wherehows.common.schemas import MultiproductRepoOwnerRecord
from wherehows.common.writers import FileWriter from wherehows.common.writers import FileWriter
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class MultiproductLoad: class MultiproductLoad:
def __init__(self): def __init__(self, args):
self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)
requests.packages.urllib3.disable_warnings() requests.packages.urllib3.disable_warnings()
self.app_id = int(args[Constant.APP_ID_KEY]) self.app_id = int(args[Constant.APP_ID_KEY])
self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY]) self.wh_exec_id = long(args[Constant.WH_EXEC_ID_KEY])
self.project_writer = FileWriter(args[Constant.GIT_PROJECT_OUTPUT_KEY])
self.repo_writer = FileWriter(args[Constant.PRODUCT_REPO_OUTPUT_KEY]) temp_dir = FileUtil.etl_temp_dir(args, "MULTIPRODUCT")
self.repo_owner_writer = FileWriter(args[Constant.PRODUCT_REPO_OWNER_OUTPUT_KEY]) self.project_writer = FileWriter(os.path.join(temp_dir, args[Constant.GIT_PROJECT_OUTPUT_KEY]))
self.repo_writer = FileWriter(os.path.join(temp_dir, args[Constant.PRODUCT_REPO_OUTPUT_KEY]))
self.repo_owner_writer = FileWriter(os.path.join(temp_dir, args[Constant.PRODUCT_REPO_OWNER_OUTPUT_KEY]))
self.multiproduct = {} self.multiproduct = {}
self.git_repo = {} self.git_repo = {}
@ -303,5 +310,5 @@ class MultiproductLoad:
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
e = MultiproductLoad() e = MultiproductLoad(args)
e.run() e.run()

View File

@ -12,10 +12,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import datetime
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import sys, os, datetime
import FileUtil
class MultiproductLoad: class MultiproductLoad:
@ -26,9 +31,11 @@ class MultiproductLoad:
password = args[Constant.WH_DB_PASSWORD_KEY] password = args[Constant.WH_DB_PASSWORD_KEY]
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
self.mp_gitli_project_file = args[Constant.GIT_PROJECT_OUTPUT_KEY]
self.product_repo_file = args[Constant.PRODUCT_REPO_OUTPUT_KEY] temp_dir = FileUtil.etl_temp_dir(args, "MULTIPRODUCT")
self.product_repo_owner_file = args[Constant.PRODUCT_REPO_OWNER_OUTPUT_KEY] self.mp_gitli_project_file = os.path.join(temp_dir, args[Constant.GIT_PROJECT_OUTPUT_KEY])
self.product_repo_file = os.path.join(temp_dir, args[Constant.PRODUCT_REPO_OUTPUT_KEY])
self.product_repo_owner_file = os.path.join(temp_dir, args[Constant.PRODUCT_REPO_OWNER_OUTPUT_KEY])
self.app_id = args[Constant.APP_ID_KEY] self.app_id = args[Constant.APP_ID_KEY]
self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]

View File

@ -12,15 +12,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
from com.ziclix.python.sql import zxJDBC import csv
import sys, os, re
import json, csv
import datetime import datetime
import json
import os
import sys
from com.ziclix.python.sql import zxJDBC
from wherehows.common.schemas import SampleDataRecord from wherehows.common.schemas import SampleDataRecord
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
from wherehows.common.writers import FileWriter from wherehows.common.writers import FileWriter
import FileUtil
class OracleExtract: class OracleExtract:
table_dict = {} table_dict = {}
@ -427,6 +432,11 @@ if __name__ == "__main__":
collect_sample = bool(args[Constant.ORA_LOAD_SAMPLE]) collect_sample = bool(args[Constant.ORA_LOAD_SAMPLE])
e.databases = args[Constant.ORA_EXCLUDE_DATABASES_KEY].split(',') e.databases = args[Constant.ORA_EXCLUDE_DATABASES_KEY].split(',')
temp_dir = FileUtil.etl_temp_dir(args, "ORACLE");
table_output_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY])
field_output_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY])
sample_output_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY])
try: try:
e.conn_db.cursor().execute("ALTER SESSION SET TIME_ZONE = 'US/Pacific'") e.conn_db.cursor().execute("ALTER SESSION SET TIME_ZONE = 'US/Pacific'")
e.conn_db.cursor().execute("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI:SS'") e.conn_db.cursor().execute("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD HH24:MI:SS'")
@ -434,10 +444,11 @@ if __name__ == "__main__":
('WhereHows (Jython)', os.getpid())) ('WhereHows (Jython)', os.getpid()))
e.conn_db.commit() e.conn_db.commit()
e.run(None, None, e.run(None,
args[Constant.ORA_SCHEMA_OUTPUT_KEY], None,
args[Constant.ORA_FIELD_OUTPUT_KEY], table_output_file,
args[Constant.ORA_SAMPLE_OUTPUT_KEY], field_output_file,
sample_output_file,
sample=collect_sample) sample=collect_sample)
finally: finally:
e.conn_db.cursor().close() e.conn_db.cursor().close()

View File

@ -12,10 +12,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import datetime
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import sys, datetime
import FileUtil
class OracleLoad: class OracleLoad:
@ -26,9 +31,6 @@ class OracleLoad:
password = args[Constant.WH_DB_PASSWORD_KEY] password = args[Constant.WH_DB_PASSWORD_KEY]
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
self.input_table_file = args[Constant.ORA_SCHEMA_OUTPUT_KEY]
self.input_field_file = args[Constant.ORA_FIELD_OUTPUT_KEY]
self.input_sample_file = args[Constant.ORA_SAMPLE_OUTPUT_KEY]
self.db_id = args[Constant.DB_ID_KEY] self.db_id = args[Constant.DB_ID_KEY]
self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]
@ -39,6 +41,11 @@ class OracleLoad:
lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT] lock_wait_time = args[Constant.INNODB_LOCK_WAIT_TIMEOUT]
self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time) self.conn_cursor.execute("SET innodb_lock_wait_timeout = %s;" % lock_wait_time)
temp_dir = FileUtil.etl_temp_dir(args, "ORACLE");
self.input_table_file = os.path.join(temp_dir, args[Constant.ORA_SCHEMA_OUTPUT_KEY])
self.input_field_file = os.path.join(temp_dir, args[Constant.ORA_FIELD_OUTPUT_KEY])
self.input_sample_file = os.path.join(temp_dir, args[Constant.ORA_SAMPLE_OUTPUT_KEY])
self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}" self.logger.info("Load Oracle Metadata into {}, db_id {}, wh_exec_id {}"
.format(JDBC_URL, self.db_id, self.wh_etl_exec_id)) .format(JDBC_URL, self.db_id, self.wh_etl_exec_id))

View File

@ -12,15 +12,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
from com.ziclix.python.sql import zxJDBC
import sys, os, re, json
import datetime import datetime
import json
import os
import re
import sys
from com.ziclix.python.sql import zxJDBC
from distutils.util import strtobool from distutils.util import strtobool
from wherehows.common.schemas import SampleDataRecord from wherehows.common.schemas import SampleDataRecord
from wherehows.common.writers import FileWriter from wherehows.common.writers import FileWriter
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class TeradataExtract: class TeradataExtract:
def __init__(self): def __init__(self):
@ -555,17 +561,22 @@ if __name__ == "__main__":
if datetime.datetime.now().strftime('%a') not in args[Constant.TD_COLLECT_SAMPLE_DATA_DAYS]: if datetime.datetime.now().strftime('%a') not in args[Constant.TD_COLLECT_SAMPLE_DATA_DAYS]:
do_sample = False do_sample = False
temp_dir = FileUtil.etl_temp_dir(args, "TERADATA")
try: try:
e.conn_td.cursor().execute( e.conn_td.cursor().execute(
"SET QUERY_BAND = 'script=%s; pid=%d; ' FOR SESSION;" % ('TeradataExtract.py', os.getpid())) "SET QUERY_BAND = 'script=%s; pid=%d; ' FOR SESSION;" % ('TeradataExtract.py', os.getpid()))
e.conn_td.commit() e.conn_td.commit()
e.log_file = args[Constant.TD_LOG_KEY] e.log_file = os.path.join(temp_dir, args[Constant.TD_LOG_KEY])
e.databases = args[Constant.TD_TARGET_DATABASES_KEY].split(',') e.databases = args[Constant.TD_TARGET_DATABASES_KEY].split(',')
e.default_database = args[Constant.TD_DEFAULT_DATABASE_KEY] e.default_database = args[Constant.TD_DEFAULT_DATABASE_KEY]
index_type = {'P': 'Primary Index', 'K': 'Primary Key', 'S': 'Secondary Index', 'Q': 'Partitioned Primary Index', index_type = {'P': 'Primary Index', 'K': 'Primary Key', 'S': 'Secondary Index', 'Q': 'Partitioned Primary Index',
'J': 'Join Index', 'U': 'Unique Index'} 'J': 'Join Index', 'U': 'Unique Index'}
e.run(None, None, args[Constant.TD_SCHEMA_OUTPUT_KEY], args[Constant.TD_SAMPLE_OUTPUT_KEY], sample=do_sample) schema_output_file = os.path.join(temp_dir, args[Constant.TD_SCHEMA_OUTPUT_KEY])
sample_output_file = os.path.join(temp_dir, args[Constant.TD_SAMPLE_OUTPUT_KEY])
e.run(None, None, schema_output_file, sample_output_file, sample=do_sample)
finally: finally:
e.conn_td.close() e.conn_td.close()

View File

@ -12,12 +12,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, datetime import datetime
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from distutils.util import strtobool from distutils.util import strtobool
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class TeradataLoad: class TeradataLoad:
def __init__(self): def __init__(self):
@ -404,9 +409,10 @@ if __name__ == "__main__":
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
l.input_file = args[Constant.TD_METADATA_KEY] temp_dir = FileUtil.etl_temp_dir(args, "TERADATA")
l.input_field_file = args[Constant.TD_FIELD_METADATA_KEY] l.input_file = os.path.join(temp_dir, args[Constant.TD_METADATA_KEY])
l.input_sampledata_file = args[Constant.TD_SAMPLE_OUTPUT_KEY] l.input_field_file = os.path.join(temp_dir, args[Constant.TD_FIELD_METADATA_KEY])
l.input_sampledata_file = os.path.join(temp_dir, args[Constant.TD_SAMPLE_OUTPUT_KEY])
do_sample = False do_sample = False
if Constant.TD_LOAD_SAMPLE in args: if Constant.TD_LOAD_SAMPLE in args:

View File

@ -12,15 +12,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import json
import datetime import datetime
import sys, os import json
import os
import sys
import time import time
from wherehows.common.writers import FileWriter from wherehows.common.writers import FileWriter
from wherehows.common.schemas import DatasetSchemaRecord, DatasetFieldRecord from wherehows.common.schemas import DatasetSchemaRecord, DatasetFieldRecord
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class TeradataTransform: class TeradataTransform:
def __init__(self): def __init__(self):
@ -132,5 +136,10 @@ if __name__ == "__main__":
t = TeradataTransform() t = TeradataTransform()
t.log_file = args['teradata.log'] t.log_file = args['teradata.log']
t.transform(args[Constant.TD_SCHEMA_OUTPUT_KEY], args[Constant.TD_METADATA_KEY], args[Constant.TD_FIELD_METADATA_KEY]) temp_dir = FileUtil.etl_temp_dir(args, "TERADATA")
input = os.path.join(temp_dir, args[Constant.TD_SCHEMA_OUTPUT_KEY])
td_metadata = os.path.join(temp_dir, args[Constant.TD_METADATA_KEY])
td_field_metadata = os.path.join(temp_dir, args[Constant.TD_FIELD_METADATA_KEY])
t.transform(input, td_metadata, td_field_metadata)

View File

@ -12,20 +12,26 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, json import json
import os
import sys
from datetime import datetime from datetime import datetime
from jython import requests from jython import requests
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class VoldemortExtract: class VoldemortExtract:
def __init__(self): def __init__(self, args):
self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__) self.logger = LoggerFactory.getLogger('jython script : ' + self.__class__.__name__)
requests.packages.urllib3.disable_warnings() requests.packages.urllib3.disable_warnings()
self.output_file = open(args[Constant.VOLDEMORT_OUTPUT_KEY], 'w') temp_dir = FileUtil.etl_temp_dir(args, "VOLDEMORT")
self.output_file = open(os.path.join(temp_dir, args[Constant.VOLDEMORT_OUTPUT_KEY]), 'w')
self.d2_proxys = [] self.d2_proxys = []
proxy_urls = [x.strip() for x in args[Constant.D2_PROXY_URL].split(',')] proxy_urls = [x.strip() for x in args[Constant.D2_PROXY_URL].split(',')]
@ -117,5 +123,5 @@ class VoldemortExtract:
if __name__ == "__main__": if __name__ == "__main__":
args = sys.argv[1] args = sys.argv[1]
e = VoldemortExtract() e = VoldemortExtract(args)
e.run() e.run()

View File

@ -12,11 +12,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# #
import sys, datetime, json import datetime
import json
import os
import sys
from com.ziclix.python.sql import zxJDBC from com.ziclix.python.sql import zxJDBC
from wherehows.common import Constant from wherehows.common import Constant
from org.slf4j import LoggerFactory from org.slf4j import LoggerFactory
import FileUtil
class VoldemortTransform: class VoldemortTransform:
@ -28,7 +34,8 @@ class VoldemortTransform:
JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY] JDBC_DRIVER = args[Constant.WH_DB_DRIVER_KEY]
JDBC_URL = args[Constant.WH_DB_URL_KEY] JDBC_URL = args[Constant.WH_DB_URL_KEY]
self.input_file = open(args[Constant.VOLDEMORT_OUTPUT_KEY], 'r') temp_dir = FileUtil.etl_temp_dir(args, "VOLDEMORT")
self.input_file = open(os.path.join(temp_dir, args[Constant.VOLDEMORT_OUTPUT_KEY]), 'r')
self.db_id = args[Constant.DB_ID_KEY] self.db_id = args[Constant.DB_ID_KEY]
self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY] self.wh_etl_exec_id = args[Constant.WH_EXEC_ID_KEY]

View File

@ -26,22 +26,19 @@ public class EspressoMetadataEtlTest {
_etl = new EspressoMetadataEtl(70, 0L); _etl = new EspressoMetadataEtl(70, 0L);
} }
@Test private void extractTest()
public void extractTest()
throws Exception { throws Exception {
_etl.extract(); _etl.extract();
// check file // check file
} }
@Test private void transformTest()
public void transformTest()
throws Exception { throws Exception {
_etl.transform(); _etl.transform();
// check staging tables in database // check staging tables in database
} }
@Test private void loadTest()
public void loadTest()
throws Exception { throws Exception {
_etl.load(); _etl.load();
// check final tables in database // check final tables in database

View File

@ -26,22 +26,19 @@ public class KafkaMetadataEtlTest {
_etl = new KafkaMetadataEtl(200, 0L); _etl = new KafkaMetadataEtl(200, 0L);
} }
@Test private void extractTest()
public void extractTest()
throws Exception { throws Exception {
_etl.extract(); _etl.extract();
// check file // check file
} }
@Test private void transformTest()
public void transformTest()
throws Exception { throws Exception {
_etl.transform(); _etl.transform();
// check staging tables in database // check staging tables in database
} }
@Test private void loadTest()
public void loadTest()
throws Exception { throws Exception {
_etl.load(); _etl.load();
// check final tables in database // check final tables in database

View File

@ -26,15 +26,13 @@ public class OracleMetadataEtlTest {
_etl = new OracleMetadataEtl(80, 0L); _etl = new OracleMetadataEtl(80, 0L);
} }
@Test private void extractTest()
public void extractTest()
throws Exception { throws Exception {
_etl.extract(); _etl.extract();
// check the csv file // check the csv file
} }
@Test private void loadTest()
public void loadTest()
throws Exception { throws Exception {
_etl.load(); _etl.load();
// check in database // check in database

View File

@ -28,21 +28,18 @@ public class TeradataMetadataEtlTest {
t.run(); t.run();
} }
@Test(groups = {"needConfig"})
public void testExtract() public void testExtract()
throws Exception { throws Exception {
TeradataMetadataEtl t = new TeradataMetadataEtl(3, 0L); TeradataMetadataEtl t = new TeradataMetadataEtl(3, 0L);
t.extract(); t.extract();
} }
@Test(groups = {"needConfig"})
public void testTransform() public void testTransform()
throws Exception { throws Exception {
TeradataMetadataEtl t = new TeradataMetadataEtl(3, 0L); TeradataMetadataEtl t = new TeradataMetadataEtl(3, 0L);
t.transform(); t.transform();
} }
@Test(groups = {"needConfig"})
public void testLoad() public void testLoad()
throws Exception { throws Exception {
TeradataMetadataEtl t = new TeradataMetadataEtl(3, 0L); TeradataMetadataEtl t = new TeradataMetadataEtl(3, 0L);

View File

@ -26,22 +26,19 @@ public class VoldemortMetadataEtlTest {
_etl = new VoldemortMetadataEtl(50, 0L); _etl = new VoldemortMetadataEtl(50, 0L);
} }
@Test private void extractTest()
public void extractTest()
throws Exception { throws Exception {
_etl.extract(); _etl.extract();
// check file // check file
} }
@Test private void transformTest()
public void transformTest()
throws Exception { throws Exception {
_etl.transform(); _etl.transform();
// check staging tables in database // check staging tables in database
} }
@Test private void loadTest()
public void loadTest()
throws Exception { throws Exception {
_etl.load(); _etl.load();
// check final tables in database // check final tables in database

View File

@ -26,15 +26,13 @@ public class CodeSearchMetadataEtlTest {
_etl = new CodeSearchMetadataEtl(800, 0L); _etl = new CodeSearchMetadataEtl(800, 0L);
} }
@Test private void extractTest()
public void extractTest()
throws Exception { throws Exception {
_etl.extract(); _etl.extract();
// check the csv file // check the csv file
} }
@Test private void loadTest()
public void loadTest()
throws Exception { throws Exception {
_etl.load(); _etl.load();
// check in database // check in database

View File

@ -26,15 +26,13 @@ public class MultiproductMetadataEtlTest {
_etl = new MultiproductMetadataEtl(500, 0L); _etl = new MultiproductMetadataEtl(500, 0L);
} }
@Test private void extractTest()
public void extractTest()
throws Exception { throws Exception {
_etl.extract(); _etl.extract();
// check the csv file // check the csv file
} }
@Test private void loadTest()
public void loadTest()
throws Exception { throws Exception {
_etl.load(); _etl.load();
// check in database // check in database