Merge pull request #85 from SunZhaonan/master

enable travis CI
This commit is contained in:
Zhaonan Sun 2016-03-15 16:43:04 -07:00
commit 0401cdf31d
7 changed files with 58 additions and 34 deletions

15
.travis.yml Normal file
View File

@ -0,0 +1,15 @@
language: java
jdk:
- oraclejdk8
before_install:
# download play 2.2.4
- wget http://downloads.typesafe.com/play/2.2.4/play-2.2.4.zip
- unzip play-2.2.4.zip && rm play-2.2.4.zip && mv play-2.2.4 $HOME/
- export PLAY_HOME="$HOME/play-2.2.4"
- echo $PLAY_HOME
# change the build file stack size
- sed -i -e 's/-Xss1M/-Xss2M/g' $PLAY_HOME/framework/build
- cat $PLAY_HOME/framework/build

View File

@ -75,11 +75,11 @@ idea {
}
}
task "build" (type: Exec, dependsOn: playCompile) {
task "build" (type: Exec, dependsOn: playCompile, overwrite: true) {
commandLine playExec, 'stage'
}
task "dist" (type: Exec) {
task "dist" (type: Exec, overwrite: true) {
commandLine playExec, 'dist'
}

View File

@ -48,8 +48,8 @@ subprojects {
}
}
ext.externalDependency = [//"mysql" : "mysql:mysql-connector-java:5.1.36",
//"jython" : "org.python:jython-standalone:2.7.0",
ext.externalDependency = ["mysql" : "mysql:mysql-connector-java:5.1.36",
"jython" : "org.python:jython-standalone:2.7.0",
"testng" : "org.testng:testng:6.9.6",
"hadoop_common" : "org.apache.hadoop:hadoop-common:2.7.1",
"hadoop_client" : "org.apache.hadoop:hadoop-mapreduce-client-core:2.7.1",

View File

@ -25,10 +25,10 @@ dependencies {
compile externalDependency.hive_exec
compile files("extralibs/terajdbc4-15.00.00.20.jar")
compile files("extralibs/tdgssconfig-15.00.00.20.jar")
// compile externalDependency.jython
// compile externalDependency.mysql
compile files("extralibs/mysql-connector-java-5.1.36.jar") // externalDependency.mysql
compile files("extralibs/jython-standalone-2.7.0.jar") //externalDependency.jython
compile externalDependency.jython
compile externalDependency.mysql
// compile files("extralibs/mysql-connector-java-5.1.36.jar")
// compile files("extralibs/jython-standalone-2.7.0.jar")
provided project(":hadoop-dataset-extractor-standalone")
testCompile externalDependency.testng
}

View File

@ -1,6 +1,4 @@
Please get the extra library files, which may not be available in Maven/TypeSafe repository or Artifactory, and put them here. For example:
* https://downloads.teradata.com/download/connectivity/jdbc-driver
* http://dev.mysql.com/downloads/connector/j
* http://download.oracle.com/otn/utilities_drivers/jdbc/121010/ojdbc7.jar
* http://www.jython.org/downloads.html

View File

@ -463,7 +463,7 @@ class TeradataExtract:
data_with_column = map(lambda x:dict(zip(columns, x)), rows_data)
return ref_urn, json.dumps({'sample': data_with_column})
def run(self, database_name, table_name, schema_output_file, sample_output_file):
def run(self, database_name, table_name, schema_output_file, sample_output_file, sample=True):
"""
The entrance of the class, extract schema and sample data
Notice the database need to have a order that the databases have more info (DWH_STG) should be scaned first.
@ -480,9 +480,6 @@ class TeradataExtract:
schema_json = open(schema_output_file, 'wb')
os.chmod(schema_output_file, 0666)
open(sample_output_file, 'wb')
os.chmod(sample_output_file, 0666)
sample_file_writer = FileWriter(sample_output_file)
if database_name is None and table_name is None: # default route: process everything
for database_name in self.databases:
@ -506,25 +503,31 @@ class TeradataExtract:
f_log.write("Get view info %12s [%s -> %s]\n" % (database_name, str(begin), str(end)))
scaned_dict = {} # a cache of {name : {urn : _, data : _}} to avoid repeat computing
# collect sample data
for onedatabase in schema:
database_name = onedatabase['database']
if 'tables' in onedatabase:
alltables = onedatabase['tables']
else:
alltables = onedatabase['views']
for onetable in alltables:
table_name = onetable['original_name'].split('.')[1]
if table_name in scaned_dict:
sample_record = SampleDataRecord('teradata', '/' + database_name + '/' + table_name,
scaned_dict[table_name]['ref_urn'], scaned_dict[table_name]['data'])
if sample:
open(sample_output_file, 'wb')
os.chmod(sample_output_file, 0666)
sample_file_writer = FileWriter(sample_output_file)
# collect sample data
for onedatabase in schema:
database_name = onedatabase['database']
if 'tables' in onedatabase:
alltables = onedatabase['tables']
else:
(ref_urn, sample_data) = self.get_sample_data(database_name, table_name)
sample_record = SampleDataRecord('teradata', '/' + database_name + '/' + table_name, '', sample_data)
scaned_dict[table_name] = {'ref_urn': ref_urn, 'data': sample_data}
sample_file_writer.append(sample_record)
sample_file_writer.close()
alltables = onedatabase['views']
for onetable in alltables:
table_name = onetable['original_name'].split('.')[1]
if table_name in scaned_dict:
sample_record = SampleDataRecord('teradata', '/' + database_name + '/' + table_name,
scaned_dict[table_name]['ref_urn'], scaned_dict[table_name]['data'])
else:
(ref_urn, sample_data) = self.get_sample_data(database_name, table_name)
sample_record = SampleDataRecord('teradata', '/' + database_name + '/' + table_name, '', sample_data)
scaned_dict[table_name] = {'ref_urn': ref_urn, 'data': sample_data}
sample_file_writer.append(sample_record)
sample_file_writer.close()
# print 'byte size of schema : ' + str(sys.getsizeof(schema))
schema_json.write(json.dumps(schema, indent=None) + '\n')
@ -554,7 +557,7 @@ if __name__ == "__main__":
index_type = {'P': 'Primary Index', 'K': 'Primary Key', 'S': 'Secondary Index', 'Q': 'Partitioned Primary Index',
'J': 'Join Index', 'U': 'Unique Index'}
e.run(None, None, args[Constant.TD_SCHEMA_OUTPUT_KEY], args[Constant.TD_SAMPLE_OUTPUT_KEY])
e.run(None, None, args[Constant.TD_SCHEMA_OUTPUT_KEY], args[Constant.TD_SAMPLE_OUTPUT_KEY], sample=False)
finally:
e.conn_td.close()

View File

@ -86,15 +86,23 @@ idea {
}
}
task "build" (type: Exec, dependsOn: playClean) {
// delegate gradle java task to play command
task "build" (type: Exec, dependsOn: playClean, overwrite: true) {
commandLine playExec, 'stage'
}
task "assemble" (type: Exec, dependsOn: playClean, overwrite: true) {
commandLine playExec, 'stage'
}
task "dist" (type: Exec) {
task "dist" (type: Exec, overwrite: true) {
commandLine playExec, 'dist'
}
task "check" (overwrite: true) {
// skip gradle check of this repository
}
/*
// optional: if using 'eclipse' plugin
eclipse {