From af04ff6efc8459c56325ea337bc5396301ecd546 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Fri, 11 Dec 2015 11:02:29 -0800 Subject: [PATCH] add git file commit history etl --- NOTICE | 8 + build.gradle | 2 + data-model/DDL/ETL_DDL/git_metadata.sql | 38 +++ .../java/metadata/etl/git/GitMetadataEtl.java | 99 +++++++ .../resources/application.properties.template | 4 + .../src/main/resources/jython/GitLoad.py | 65 +++++ .../src/main/resources/jython/GitTransform.py | 81 ++++++ .../metadata/etl/git/GitMetadataEtlTest.java | 55 ++++ wherehows-common/build.gradle | 2 + .../main/java/wherehows/common/Constant.java | 4 + .../common/schemas/GitCommitRecord.java | 68 +++++ .../java/wherehows/common/utils/GitUtil.java | 256 ++++++++++++++++++ .../wherehows/common/utils/GitUtilTest.java | 41 +++ 13 files changed, 723 insertions(+) create mode 100644 data-model/DDL/ETL_DDL/git_metadata.sql create mode 100644 metadata-etl/src/main/java/metadata/etl/git/GitMetadataEtl.java create mode 100644 metadata-etl/src/main/resources/jython/GitLoad.py create mode 100644 metadata-etl/src/main/resources/jython/GitTransform.py create mode 100644 metadata-etl/src/test/java/metadata/etl/git/GitMetadataEtlTest.java create mode 100644 wherehows-common/src/main/java/wherehows/common/schemas/GitCommitRecord.java create mode 100644 wherehows-common/src/main/java/wherehows/common/utils/GitUtil.java create mode 100644 wherehows-common/src/test/java/wherehows/common/utils/GitUtilTest.java diff --git a/NOTICE b/NOTICE index 1cc1817767..39522ba9ba 100644 --- a/NOTICE +++ b/NOTICE @@ -54,3 +54,11 @@ License: Apache 2.0 This product includes/uses JsonPath (http://goessner.net/articles/JsonPath/) Copyright (c) 2011 the original author or authors License: Apache 2.0 + +This product includes/uses jsoup (http://jsoup.org/) +Copyright © 2009 - 2013 Jonathan Hedley (jonathan@hedley.net) +License: MIT + +This product includes/uses JGit (https://eclipse.org/jgit/) +Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors +License: Eclipse Distribution License diff --git a/build.gradle b/build.gradle index ecae39e9e2..2b476498ce 100644 --- a/build.gradle +++ b/build.gradle @@ -50,6 +50,8 @@ subprojects { "hadoop_auth" : "org.apache.hadoop:hadoop-auth:2.7.1", "json_path" : "com.jayway.jsonpath:json-path:2.0.0", "akka" : "com.typesafe.akka:akka-actor_2.10:2.2.0", + "jgit" : "org.eclipse.jgit:org.eclipse.jgit:4.1.1.201511131810-r", + "jsoup" : "org.jsoup:jsoup:1.8.3", "jackson_databind" : "com.fasterxml.jackson.core:jackson-databind:2.6.1", "jackson_core" : "com.fasterxml.jackson.core:jackson-core:2.6.1", diff --git a/data-model/DDL/ETL_DDL/git_metadata.sql b/data-model/DDL/ETL_DDL/git_metadata.sql new file mode 100644 index 0000000000..df9bf86738 --- /dev/null +++ b/data-model/DDL/ETL_DDL/git_metadata.sql @@ -0,0 +1,38 @@ +CREATE TABLE `source_code_commit_info` ( + `app_id` SMALLINT(5) UNSIGNED DEFAULT NULL, + `repository_urn` VARCHAR(300) CHAR SET latin1 NOT NULL COMMENT 'the git repo urn', + `commit_id` VARCHAR(50) CHAR SET latin1 NOT NULL COMMENT 'the sha-1 hash of the commit', + `file_path` VARCHAR(600) CHAR SET latin1 NOT NULL COMMENT 'the path to the file', + `file_name` VARCHAR(127) NOT NULL COMMENT 'the file name', + `commit_time` INT UNSIGNED COMMENT 'the commit time', + `committer_name` VARCHAR(128) NOT NULL COMMENT 'name of the committer', + `committer_email` VARCHAR(128) DEFAULT NULL COMMENT 'email of the committer', + `author_name` VARCHAR(128) NOT NULL COMMENT 'name of the author', + `author_email` VARCHAR(128) NOT NULL COMMENT 'email of the author', + `message` VARCHAR(1024) NOT NULL COMMENT 'message of the commit', + `created_time` INT UNSIGNED COMMENT 'wherehows created time', + `modified_time` INT UNSIGNED COMMENT 'latest wherehows modified', + `wh_etl_exec_id` BIGINT COMMENT 'wherehows etl execution id that modified this record', + PRIMARY KEY (repository_urn, file_path, commit_id), + KEY (commit_id), + KEY (repository_urn, file_name, committer_email) +) ENGINE = InnoDB DEFAULT CHARSET = utf8; + +CREATE TABLE `stg_source_code_commit_info` ( + `app_id` SMALLINT(5) UNSIGNED DEFAULT NULL, + `repository_urn` VARCHAR(300) CHAR SET latin1 NOT NULL COMMENT 'the git repo urn', + `commit_id` VARCHAR(50) CHAR SET latin1 NOT NULL COMMENT 'the sha-1 hash of the commit', + `file_path` VARCHAR(600) CHAR SET latin1 NOT NULL COMMENT 'the path to the file', + `file_name` VARCHAR(127) NOT NULL COMMENT 'the file name', + `commit_time` INT UNSIGNED COMMENT 'the commit time', + `committer_name` VARCHAR(128) NOT NULL COMMENT 'name of the committer', + `committer_email` VARCHAR(128) DEFAULT NULL COMMENT 'email of the committer', + `author_name` VARCHAR(128) NOT NULL COMMENT 'name of the author', + `author_email` VARCHAR(128) NOT NULL COMMENT 'email of the author', + `message` VARCHAR(1024) NOT NULL COMMENT 'message of the commit', + `wh_etl_exec_id` BIGINT COMMENT 'wherehows etl execution id that modified this record', + PRIMARY KEY (repository_urn, file_path, commit_id), + KEY (commit_id), + KEY (repository_urn, file_name, committer_email) +) ENGINE = InnoDB DEFAULT CHARSET = utf8; + diff --git a/metadata-etl/src/main/java/metadata/etl/git/GitMetadataEtl.java b/metadata-etl/src/main/java/metadata/etl/git/GitMetadataEtl.java new file mode 100644 index 0000000000..927c741861 --- /dev/null +++ b/metadata-etl/src/main/java/metadata/etl/git/GitMetadataEtl.java @@ -0,0 +1,99 @@ +/** + * Copyright 2015 LinkedIn Corp. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +package metadata.etl.git; + +import java.io.File; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Properties; +import java.util.Set; +import metadata.etl.EtlJob; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import wherehows.common.Constant; +import wherehows.common.schemas.GitCommitRecord; +import wherehows.common.utils.GitUtil; +import wherehows.common.writers.FileWriter; + + +/** + * Created by zechen on 12/7/15. + */ +public class GitMetadataEtl extends EtlJob { + + public ClassLoader classLoader = getClass().getClassLoader(); + protected final Logger logger = LoggerFactory.getLogger(getClass()); + public static final String COMMIT_OUTPUT_FILE = "commit.csv"; + + public GitMetadataEtl(int appId, long whExecId) { + super(appId, null, whExecId); + } + + public GitMetadataEtl(int appId, long whExecId, Properties prop) { + super(appId, null, whExecId, prop); + } + + public void extract() throws Exception { + logger.info("git extract"); + String gitHost = this.prop.getProperty(Constant.GIT_HOST_KEY); + String[] projects = (this.prop.getProperty(Constant.GIT_PROJECT_WHITELIST_KEY)).trim().split("\\s*,\\s*"); + Set blackCommitters = new HashSet<>( + Arrays.asList(this.prop.getProperty(Constant.GIT_COMMITTER_BLACKLIST_KEY).trim().split("\\s*,\\s*"))); + + String localDir = this.prop.getProperty(Constant.WH_APP_FOLDER_KEY) + "/" + this.prop.getProperty(Constant.APP_ID_KEY); + File dir = new File(localDir); + if (!dir.exists()) { + if (!dir.mkdirs()) { + throw new Exception("can not create metadata directory"); + } + } + FileWriter fw = new FileWriter(localDir + "/" + COMMIT_OUTPUT_FILE); + for (String project : projects) { + List repos = GitUtil.getRepoListFromProject(GitUtil.getHttpsUrl(gitHost, project)); + for (String repo : repos) { + String repoUri = GitUtil.getGitUrl(gitHost, repo); + String repoDir = localDir + "/" + repo; + GitUtil.clone(repoUri, repoDir); + List commitMetadatas = GitUtil.getRepoMetadata(repoDir); + for (GitUtil.CommitMetadata m : commitMetadatas) { + fw.append(new GitCommitRecord(m, repoUri)); + } + } + } + fw.close(); + } + + @Override + public void transform() + throws Exception { + logger.info("git transform"); + // call a python script to do the transformation + InputStream inputStream = classLoader.getResourceAsStream("jython/GitTransform.py"); + interpreter.execfile(inputStream); + inputStream.close(); + } + + @Override + public void load() + throws Exception { + logger.info("ldap db load"); + InputStream inputStream = classLoader.getResourceAsStream("jython/GitLoad.py"); + interpreter.execfile(inputStream); + inputStream.close(); + } + + +} diff --git a/metadata-etl/src/main/resources/application.properties.template b/metadata-etl/src/main/resources/application.properties.template index ce7f50b144..7e3504bc9f 100644 --- a/metadata-etl/src/main/resources/application.properties.template +++ b/metadata-etl/src/main/resources/application.properties.template @@ -102,3 +102,7 @@ ldap.group.context.security.credentials= ldap.group.search.domains= ldap.group.search.return.attributes= +# git +git.host= +git.project.whitelist= + diff --git a/metadata-etl/src/main/resources/jython/GitLoad.py b/metadata-etl/src/main/resources/jython/GitLoad.py new file mode 100644 index 0000000000..bba2832f40 --- /dev/null +++ b/metadata-etl/src/main/resources/jython/GitLoad.py @@ -0,0 +1,65 @@ +# +# Copyright 2015 LinkedIn Corp. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# + +__author__ = 'zechen' + +from wherehows.common import Constant +from com.ziclix.python.sql import zxJDBC +import sys + + +class GitLoad: + + def __init__(self, args): + self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY], + args[Constant.WH_DB_USERNAME_KEY], + args[Constant.WH_DB_PASSWORD_KEY], + args[Constant.WH_DB_DRIVER_KEY]) + self.wh_cursor = self.wh_con.cursor() + self.app_id = int(args[Constant.APP_ID_KEY]) + + def run(self): + self.load_from_stg() + self.wh_cursor.close() + self.wh_con.close() + + def load_from_stg(self): + query = """ + INSERT INTO source_code_commit_info + ( + app_id, repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email, + author_name, author_email, message, created_time, wh_etl_exec_id + ) + select app_id, repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email, + author_name, author_email, message, unix_timestamp(NOW()), wh_etl_exec_id + from stg_source_code_commit_info s + where s.app_id = {app_id} + on duplicate key update + commit_time = s.commit_time, + committer_name = s.committer_name, + committer_email = s.committer_email, + author_name = s.author_name, + author_email = s.author_email, + message = s.message, + modified_time = unix_timestamp(NOW()), + wh_etl_exec_id = s.wh_etl_exec_id + """.format(app_id=self.app_id) + print query + self.wh_cursor.execute(query) + self.wh_con.commit() + +if __name__ == "__main__": + props = sys.argv[1] + git = GitLoad(props) + git.run() diff --git a/metadata-etl/src/main/resources/jython/GitTransform.py b/metadata-etl/src/main/resources/jython/GitTransform.py new file mode 100644 index 0000000000..23da6ec9da --- /dev/null +++ b/metadata-etl/src/main/resources/jython/GitTransform.py @@ -0,0 +1,81 @@ +# +# Copyright 2015 LinkedIn Corp. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# + +__author__ = 'zechen' + +from wherehows.common import Constant +from com.ziclix.python.sql import zxJDBC +import sys + + +class OwnerTransform: + _tables = {"source_code_commit": {"columns": "repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email, author_name, author_email, message", + "file": "commit.csv", + "table": "stg_source_code_commit_info"} + } + + _clear_staging_tempalte = """ + DELETE FROM {table} + """ + + _read_file_template = """ + LOAD DATA LOCAL INFILE '{folder}/{file}' + INTO TABLE {table} + FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0' + LINES TERMINATED BY '\n' + ({columns}) + SET app_id = {app_id}, + wh_etl_exec_id = {wh_etl_exec_id}; + """ + + def __init__(self, args): + self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY], + args[Constant.WH_DB_USERNAME_KEY], + args[Constant.WH_DB_PASSWORD_KEY], + args[Constant.WH_DB_DRIVER_KEY]) + self.wh_cursor = self.wh_con.cursor() + self.app_id = int(args[Constant.APP_ID_KEY]) + self.wh_etl_exec_id = int(args[Constant.WH_EXEC_ID_KEY]) + self.app_folder = args[Constant.WH_APP_FOLDER_KEY] + self.metadata_folder = self.app_folder + "/" + str(self.app_id) + + def run(self): + self.read_file_to_stg() + self.wh_cursor.close() + self.wh_con.close() + + def read_file_to_stg(self): + t = self._tables["source_code_commit"] + + # Clear stagging table + query = self._clear_staging_tempalte.format(table=t.get("table")) + print query + self.wh_cursor.execute(query) + self.wh_con.commit() + + # Load file into stagging table + query = self._read_file_template.format(folder=self.metadata_folder, + file=t.get("file"), + table=t.get("table"), + columns=t.get("columns"), + app_id=self.app_id, + wh_etl_exec_id=self.wh_etl_exec_id) + print query + self.wh_cursor.execute(query) + self.wh_con.commit() + +if __name__ == "__main__": + props = sys.argv[1] + ot = OwnerTransform(props) + ot.run() diff --git a/metadata-etl/src/test/java/metadata/etl/git/GitMetadataEtlTest.java b/metadata-etl/src/test/java/metadata/etl/git/GitMetadataEtlTest.java new file mode 100644 index 0000000000..0687244975 --- /dev/null +++ b/metadata-etl/src/test/java/metadata/etl/git/GitMetadataEtlTest.java @@ -0,0 +1,55 @@ +/** + * Copyright 2015 LinkedIn Corp. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +package metadata.etl.git; + +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +/** + * Created by zechen on 12/8/15. + */ +public class GitMetadataEtlTest { + GitMetadataEtl git; + + @BeforeMethod + public void setUp() + throws Exception { + this.git = new GitMetadataEtl(500, 0L); + } + + @Test + public void testExtract() + throws Exception { + git.extract(); + } + + @Test + public void testTransform() + throws Exception { + git.transform(); + } + + @Test + public void testLoad() + throws Exception { + git.load(); + } + + @Test + public void testRun() + throws Exception { + git.run(); + } +} \ No newline at end of file diff --git a/wherehows-common/build.gradle b/wherehows-common/build.gradle index 58c9aa4047..2e8baed9a7 100644 --- a/wherehows-common/build.gradle +++ b/wherehows-common/build.gradle @@ -5,6 +5,8 @@ dependencies { compile externalDependency.slf4j_api compile externalDependency.slf4j_log4j compile externalDependency.spring_jdbc + compile externalDependency.jgit + compile externalDependency.jsoup testCompile externalDependency.testng testCompile project(":metadata-etl") } diff --git a/wherehows-common/src/main/java/wherehows/common/Constant.java b/wherehows-common/src/main/java/wherehows/common/Constant.java index c3fba3d855..4d80e23ad0 100644 --- a/wherehows-common/src/main/java/wherehows/common/Constant.java +++ b/wherehows-common/src/main/java/wherehows/common/Constant.java @@ -102,4 +102,8 @@ public class Constant { public static final String LDAP_GROUP_SEARCH_DOMAINS_KEY = "ldap.group.search.domains"; public static final String LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY = "ldap.group.search.return.attributes"; + // git + public static final String GIT_HOST_KEY = "git.host"; + public static final String GIT_PROJECT_WHITELIST_KEY = "git.project.whitelist"; + } diff --git a/wherehows-common/src/main/java/wherehows/common/schemas/GitCommitRecord.java b/wherehows-common/src/main/java/wherehows/common/schemas/GitCommitRecord.java new file mode 100644 index 0000000000..04a4104374 --- /dev/null +++ b/wherehows-common/src/main/java/wherehows/common/schemas/GitCommitRecord.java @@ -0,0 +1,68 @@ +/** + * Copyright 2015 LinkedIn Corp. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +package wherehows.common.schemas; + +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.io.FilenameUtils; +import wherehows.common.utils.GitUtil; + + +/** + * Created by zechen on 12/8/15. + */ +public class GitCommitRecord extends AbstractRecord { + String gitRepoUrn; + String commitId; + String filePath; + String fileName; + Long commitTime; + String committerName; + String committerEmail; + String authorName; + String authorEmail; + String message; + + public GitCommitRecord() { + } + + public GitCommitRecord(GitUtil.CommitMetadata commitMetadata, String gitRepoUrn) { + this.gitRepoUrn = gitRepoUrn; + this.commitId = commitMetadata.getCommitId(); + this.filePath = commitMetadata.getFilePath(); + this.fileName = FilenameUtils.getName(this.filePath); + this.commitTime = commitMetadata.getCommitTime().getTime() / 1000; + this.committerName = commitMetadata.getCommitter(); + this.committerEmail = commitMetadata.getCommitterEmail(); + this.authorName = commitMetadata.getAuthor(); + this.authorEmail = commitMetadata.getAuthorEmail(); + this.message = commitMetadata.getMessage(); + } + + @Override + public List fillAllFields() { + List allFields = new ArrayList<>(); + allFields.add(gitRepoUrn); + allFields.add(commitId); + allFields.add(filePath); + allFields.add(fileName); + allFields.add(commitTime); + allFields.add(committerName); + allFields.add(committerEmail); + allFields.add(authorName); + allFields.add(authorEmail); + allFields.add(message); + return allFields; + } +} diff --git a/wherehows-common/src/main/java/wherehows/common/utils/GitUtil.java b/wherehows-common/src/main/java/wherehows/common/utils/GitUtil.java new file mode 100644 index 0000000000..3a79870fd5 --- /dev/null +++ b/wherehows-common/src/main/java/wherehows/common/utils/GitUtil.java @@ -0,0 +1,256 @@ +/** + * Copyright 2015 LinkedIn Corp. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +package wherehows.common.utils; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.LinkedList; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.PersonIdent; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.storage.file.FileRepositoryBuilder; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Created by abhattac on 2/5/15, + * Modified by zechen on 12/8/15. + */ + +public class GitUtil { + private static final Logger logger = LoggerFactory.getLogger(GitUtil.class); + private final static String DEFAULT_HOST = "gitli.corp.linkedin.com"; + public static final String HTTPS_PROTOCAL = "https"; + public static final String GIT_PROTOCAL = "git"; + public static final String GIT_SUBFIX = ".git"; + + /** + * Cloning the remote git repo to local directory + * @param remoteUri remote git url e.g. git://gitli.example.com/project/repo.git + * @param localDir local destination clone directory + * @throws IOException + * @throws GitAPIException + */ + public static void clone(String remoteUri, String localDir) throws IOException, GitAPIException { + //create local git directory + File localGitRepo = new File(localDir); + if (localGitRepo.exists()) { + if (localGitRepo.isDirectory()) { + // clean up directory + FileUtils.cleanDirectory(localGitRepo); + } else { + throw new IOException("File exists: " + localDir); + } + } else { + localGitRepo.mkdirs(); + } + + Git g = Git.cloneRepository().setURI(remoteUri).setDirectory(localGitRepo).call(); + g.close(); + } + + /** + * Crawlling the project page to get list of repositories, only works for Gitorious + * @param projectUrl the project url e.g. https://git.example.com/project + * @return List of path of repositories e.g. project/repo + * @throws IOException + */ + public static List getRepoListFromProject(String projectUrl) throws IOException { + + List repoList = new LinkedList<>(); + Document doc = Jsoup.connect(projectUrl).get(); + Elements repos = doc.getElementsByClass("repository"); + + for (Element e : repos) { + String repo = e.children().first().text(); + repoList.add(repo.trim()); + } + + return repoList; + } + + /** + * Fetch all commit metadata from the repo + * @param repoDir repository directory + * @return list of commit metadata + * @throws IOException + * @throws GitAPIException + */ + public static List getRepoMetadata(String repoDir) throws IOException, GitAPIException { + + List metadataList = new ArrayList<>(); + + FileRepositoryBuilder builder = new FileRepositoryBuilder(); + Repository repository = builder.setGitDir(new File(repoDir, ".git")).readEnvironment().findGitDir().build(); + + // Current branch may not be master. Instead of hard coding determine the current branch + String currentBranch = repository.getBranch(); + Ref head = repository.getRef("refs/heads/" + currentBranch); // current branch may not be "master" + if (head == null) { + return metadataList; + } + + Git git = new Git(repository); + + RevWalk walk = new RevWalk(repository); + RevCommit commit = walk.parseCommit(head.getObjectId()); + + TreeWalk treeWalk = new TreeWalk(repository); + treeWalk.addTree(commit.getTree()); + treeWalk.setRecursive(true); + while (treeWalk.next()) { + String filePath = treeWalk.getPathString(); + Iterable commitLog = git.log().add(repository.resolve(Constants.HEAD)).addPath(filePath).call(); + for (RevCommit r : commitLog) { + CommitMetadata metadata = new CommitMetadata(r.getName()); + metadata.setFilePath(filePath); + metadata.setMessage(r.getShortMessage().trim()); + // Difference between committer and author + // refer to: http://git-scm.com/book/ch2-3.html + PersonIdent committer = r.getCommitterIdent(); + PersonIdent author = r.getAuthorIdent(); + metadata.setAuthor(author.getName()); + metadata.setAuthorEmail(author.getEmailAddress()); + metadata.setCommitter(committer.getName()); + metadata.setCommitterEmail(committer.getEmailAddress()); + metadata.setCommitTime(committer.getWhen()); + metadataList.add(metadata); + } + } + git.close(); + return metadataList; + } + + public static String getHttpsUrl(String host, String path) { + return HTTPS_PROTOCAL + "://" + host + "/" + path; + } + + public static String getGitUrl(String host, String path) { + return GIT_PROTOCAL + "://" + host + "/" + path + GIT_SUBFIX; + } + + public static String getSshUrl(String host, String path) { + return GIT_PROTOCAL + "@" + host + ":" + path; + } + + public static class CommitMetadata { + String commitId; + String author; + String committer; + Date commitTime; + String message; + String committerEmail; + String authorEmail; + String filePath; + + public CommitMetadata() { + } + + public CommitMetadata(String commitId) { + this.commitId = commitId; + } + + public CommitMetadata(String commitId, String author, String committer, Date commitTime, String message, + String committerEmail, String authorEmail, String filePath) { + this.commitId = commitId; + this.author = author; + this.committer = committer; + this.commitTime = commitTime; + this.message = message; + this.committerEmail = committerEmail; + this.authorEmail = authorEmail; + this.filePath = filePath; + } + + public String getCommitId() { + return commitId; + } + + public void setCommitId(String commitId) { + this.commitId = commitId; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public String getCommitter() { + return committer; + } + + public void setCommitter(String committer) { + this.committer = committer; + } + + public Date getCommitTime() { + return commitTime; + } + + public void setCommitTime(Date commitTime) { + this.commitTime = commitTime; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } + + public String getCommitterEmail() { + return committerEmail; + } + + public void setCommitterEmail(String committerEmail) { + this.committerEmail = committerEmail; + } + + public String getAuthorEmail() { + return authorEmail; + } + + public void setAuthorEmail(String authorEmail) { + this.authorEmail = authorEmail; + } + + public String getFilePath() { + return filePath; + } + + public void setFilePath(String filePath) { + this.filePath = filePath; + } + } + +} + diff --git a/wherehows-common/src/test/java/wherehows/common/utils/GitUtilTest.java b/wherehows-common/src/test/java/wherehows/common/utils/GitUtilTest.java new file mode 100644 index 0000000000..5302a7cb21 --- /dev/null +++ b/wherehows-common/src/test/java/wherehows/common/utils/GitUtilTest.java @@ -0,0 +1,41 @@ +/** + * Copyright 2015 LinkedIn Corp. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +package wherehows.common.utils; + +import org.testng.annotations.Test; + +/** + * Created by zechen on 12/8/15. + */ +public class GitUtilTest { + + @Test + public void testClone() + throws Exception { + //GitUtil.clone("git://git.example.com/project/repo.git", "/tmp/project/repo"); + } + + @Test + public void testGetRepoListFromProject() + throws Exception { + //List repos = GitUtil.getRepoListFromProject("git://git.example.com/project"); + //Assert.assertTrue(repos.size() > 0); + } + + @Test + public void testGetRepoMetadata() + throws Exception { + //GitUtil.getRepoMetadata("/tmp/project/repo"); + } +} \ No newline at end of file