mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-25 17:08:29 +00:00
add git file commit history etl
This commit is contained in:
parent
ebbf9ec629
commit
af04ff6efc
8
NOTICE
8
NOTICE
@ -54,3 +54,11 @@ License: Apache 2.0
|
||||
This product includes/uses JsonPath (http://goessner.net/articles/JsonPath/)
|
||||
Copyright (c) 2011 the original author or authors
|
||||
License: Apache 2.0
|
||||
|
||||
This product includes/uses jsoup (http://jsoup.org/)
|
||||
Copyright © 2009 - 2013 Jonathan Hedley (jonathan@hedley.net)
|
||||
License: MIT
|
||||
|
||||
This product includes/uses JGit (https://eclipse.org/jgit/)
|
||||
Copyright (c) 2007, Eclipse Foundation, Inc. and its licensors
|
||||
License: Eclipse Distribution License
|
||||
|
||||
@ -50,6 +50,8 @@ subprojects {
|
||||
"hadoop_auth" : "org.apache.hadoop:hadoop-auth:2.7.1",
|
||||
"json_path" : "com.jayway.jsonpath:json-path:2.0.0",
|
||||
"akka" : "com.typesafe.akka:akka-actor_2.10:2.2.0",
|
||||
"jgit" : "org.eclipse.jgit:org.eclipse.jgit:4.1.1.201511131810-r",
|
||||
"jsoup" : "org.jsoup:jsoup:1.8.3",
|
||||
|
||||
"jackson_databind" : "com.fasterxml.jackson.core:jackson-databind:2.6.1",
|
||||
"jackson_core" : "com.fasterxml.jackson.core:jackson-core:2.6.1",
|
||||
|
||||
38
data-model/DDL/ETL_DDL/git_metadata.sql
Normal file
38
data-model/DDL/ETL_DDL/git_metadata.sql
Normal file
@ -0,0 +1,38 @@
|
||||
CREATE TABLE `source_code_commit_info` (
|
||||
`app_id` SMALLINT(5) UNSIGNED DEFAULT NULL,
|
||||
`repository_urn` VARCHAR(300) CHAR SET latin1 NOT NULL COMMENT 'the git repo urn',
|
||||
`commit_id` VARCHAR(50) CHAR SET latin1 NOT NULL COMMENT 'the sha-1 hash of the commit',
|
||||
`file_path` VARCHAR(600) CHAR SET latin1 NOT NULL COMMENT 'the path to the file',
|
||||
`file_name` VARCHAR(127) NOT NULL COMMENT 'the file name',
|
||||
`commit_time` INT UNSIGNED COMMENT 'the commit time',
|
||||
`committer_name` VARCHAR(128) NOT NULL COMMENT 'name of the committer',
|
||||
`committer_email` VARCHAR(128) DEFAULT NULL COMMENT 'email of the committer',
|
||||
`author_name` VARCHAR(128) NOT NULL COMMENT 'name of the author',
|
||||
`author_email` VARCHAR(128) NOT NULL COMMENT 'email of the author',
|
||||
`message` VARCHAR(1024) NOT NULL COMMENT 'message of the commit',
|
||||
`created_time` INT UNSIGNED COMMENT 'wherehows created time',
|
||||
`modified_time` INT UNSIGNED COMMENT 'latest wherehows modified',
|
||||
`wh_etl_exec_id` BIGINT COMMENT 'wherehows etl execution id that modified this record',
|
||||
PRIMARY KEY (repository_urn, file_path, commit_id),
|
||||
KEY (commit_id),
|
||||
KEY (repository_urn, file_name, committer_email)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8;
|
||||
|
||||
CREATE TABLE `stg_source_code_commit_info` (
|
||||
`app_id` SMALLINT(5) UNSIGNED DEFAULT NULL,
|
||||
`repository_urn` VARCHAR(300) CHAR SET latin1 NOT NULL COMMENT 'the git repo urn',
|
||||
`commit_id` VARCHAR(50) CHAR SET latin1 NOT NULL COMMENT 'the sha-1 hash of the commit',
|
||||
`file_path` VARCHAR(600) CHAR SET latin1 NOT NULL COMMENT 'the path to the file',
|
||||
`file_name` VARCHAR(127) NOT NULL COMMENT 'the file name',
|
||||
`commit_time` INT UNSIGNED COMMENT 'the commit time',
|
||||
`committer_name` VARCHAR(128) NOT NULL COMMENT 'name of the committer',
|
||||
`committer_email` VARCHAR(128) DEFAULT NULL COMMENT 'email of the committer',
|
||||
`author_name` VARCHAR(128) NOT NULL COMMENT 'name of the author',
|
||||
`author_email` VARCHAR(128) NOT NULL COMMENT 'email of the author',
|
||||
`message` VARCHAR(1024) NOT NULL COMMENT 'message of the commit',
|
||||
`wh_etl_exec_id` BIGINT COMMENT 'wherehows etl execution id that modified this record',
|
||||
PRIMARY KEY (repository_urn, file_path, commit_id),
|
||||
KEY (commit_id),
|
||||
KEY (repository_urn, file_name, committer_email)
|
||||
) ENGINE = InnoDB DEFAULT CHARSET = utf8;
|
||||
|
||||
@ -0,0 +1,99 @@
|
||||
/**
|
||||
* Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
*/
|
||||
package metadata.etl.git;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import metadata.etl.EtlJob;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import wherehows.common.Constant;
|
||||
import wherehows.common.schemas.GitCommitRecord;
|
||||
import wherehows.common.utils.GitUtil;
|
||||
import wherehows.common.writers.FileWriter;
|
||||
|
||||
|
||||
/**
|
||||
* Created by zechen on 12/7/15.
|
||||
*/
|
||||
public class GitMetadataEtl extends EtlJob {
|
||||
|
||||
public ClassLoader classLoader = getClass().getClassLoader();
|
||||
protected final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
public static final String COMMIT_OUTPUT_FILE = "commit.csv";
|
||||
|
||||
public GitMetadataEtl(int appId, long whExecId) {
|
||||
super(appId, null, whExecId);
|
||||
}
|
||||
|
||||
public GitMetadataEtl(int appId, long whExecId, Properties prop) {
|
||||
super(appId, null, whExecId, prop);
|
||||
}
|
||||
|
||||
public void extract() throws Exception {
|
||||
logger.info("git extract");
|
||||
String gitHost = this.prop.getProperty(Constant.GIT_HOST_KEY);
|
||||
String[] projects = (this.prop.getProperty(Constant.GIT_PROJECT_WHITELIST_KEY)).trim().split("\\s*,\\s*");
|
||||
Set<String> blackCommitters = new HashSet<>(
|
||||
Arrays.asList(this.prop.getProperty(Constant.GIT_COMMITTER_BLACKLIST_KEY).trim().split("\\s*,\\s*")));
|
||||
|
||||
String localDir = this.prop.getProperty(Constant.WH_APP_FOLDER_KEY) + "/" + this.prop.getProperty(Constant.APP_ID_KEY);
|
||||
File dir = new File(localDir);
|
||||
if (!dir.exists()) {
|
||||
if (!dir.mkdirs()) {
|
||||
throw new Exception("can not create metadata directory");
|
||||
}
|
||||
}
|
||||
FileWriter fw = new FileWriter(localDir + "/" + COMMIT_OUTPUT_FILE);
|
||||
for (String project : projects) {
|
||||
List<String> repos = GitUtil.getRepoListFromProject(GitUtil.getHttpsUrl(gitHost, project));
|
||||
for (String repo : repos) {
|
||||
String repoUri = GitUtil.getGitUrl(gitHost, repo);
|
||||
String repoDir = localDir + "/" + repo;
|
||||
GitUtil.clone(repoUri, repoDir);
|
||||
List<GitUtil.CommitMetadata> commitMetadatas = GitUtil.getRepoMetadata(repoDir);
|
||||
for (GitUtil.CommitMetadata m : commitMetadatas) {
|
||||
fw.append(new GitCommitRecord(m, repoUri));
|
||||
}
|
||||
}
|
||||
}
|
||||
fw.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform()
|
||||
throws Exception {
|
||||
logger.info("git transform");
|
||||
// call a python script to do the transformation
|
||||
InputStream inputStream = classLoader.getResourceAsStream("jython/GitTransform.py");
|
||||
interpreter.execfile(inputStream);
|
||||
inputStream.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void load()
|
||||
throws Exception {
|
||||
logger.info("ldap db load");
|
||||
InputStream inputStream = classLoader.getResourceAsStream("jython/GitLoad.py");
|
||||
interpreter.execfile(inputStream);
|
||||
inputStream.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@ -102,3 +102,7 @@ ldap.group.context.security.credentials=
|
||||
ldap.group.search.domains=
|
||||
ldap.group.search.return.attributes=
|
||||
|
||||
# git
|
||||
git.host=
|
||||
git.project.whitelist=
|
||||
|
||||
|
||||
65
metadata-etl/src/main/resources/jython/GitLoad.py
Normal file
65
metadata-etl/src/main/resources/jython/GitLoad.py
Normal file
@ -0,0 +1,65 @@
|
||||
#
|
||||
# Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#
|
||||
|
||||
__author__ = 'zechen'
|
||||
|
||||
from wherehows.common import Constant
|
||||
from com.ziclix.python.sql import zxJDBC
|
||||
import sys
|
||||
|
||||
|
||||
class GitLoad:
|
||||
|
||||
def __init__(self, args):
|
||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||
args[Constant.WH_DB_USERNAME_KEY],
|
||||
args[Constant.WH_DB_PASSWORD_KEY],
|
||||
args[Constant.WH_DB_DRIVER_KEY])
|
||||
self.wh_cursor = self.wh_con.cursor()
|
||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||
|
||||
def run(self):
|
||||
self.load_from_stg()
|
||||
self.wh_cursor.close()
|
||||
self.wh_con.close()
|
||||
|
||||
def load_from_stg(self):
|
||||
query = """
|
||||
INSERT INTO source_code_commit_info
|
||||
(
|
||||
app_id, repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email,
|
||||
author_name, author_email, message, created_time, wh_etl_exec_id
|
||||
)
|
||||
select app_id, repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email,
|
||||
author_name, author_email, message, unix_timestamp(NOW()), wh_etl_exec_id
|
||||
from stg_source_code_commit_info s
|
||||
where s.app_id = {app_id}
|
||||
on duplicate key update
|
||||
commit_time = s.commit_time,
|
||||
committer_name = s.committer_name,
|
||||
committer_email = s.committer_email,
|
||||
author_name = s.author_name,
|
||||
author_email = s.author_email,
|
||||
message = s.message,
|
||||
modified_time = unix_timestamp(NOW()),
|
||||
wh_etl_exec_id = s.wh_etl_exec_id
|
||||
""".format(app_id=self.app_id)
|
||||
print query
|
||||
self.wh_cursor.execute(query)
|
||||
self.wh_con.commit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
props = sys.argv[1]
|
||||
git = GitLoad(props)
|
||||
git.run()
|
||||
81
metadata-etl/src/main/resources/jython/GitTransform.py
Normal file
81
metadata-etl/src/main/resources/jython/GitTransform.py
Normal file
@ -0,0 +1,81 @@
|
||||
#
|
||||
# Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#
|
||||
|
||||
__author__ = 'zechen'
|
||||
|
||||
from wherehows.common import Constant
|
||||
from com.ziclix.python.sql import zxJDBC
|
||||
import sys
|
||||
|
||||
|
||||
class OwnerTransform:
|
||||
_tables = {"source_code_commit": {"columns": "repository_urn, commit_id, file_path, file_name, commit_time, committer_name, committer_email, author_name, author_email, message",
|
||||
"file": "commit.csv",
|
||||
"table": "stg_source_code_commit_info"}
|
||||
}
|
||||
|
||||
_clear_staging_tempalte = """
|
||||
DELETE FROM {table}
|
||||
"""
|
||||
|
||||
_read_file_template = """
|
||||
LOAD DATA LOCAL INFILE '{folder}/{file}'
|
||||
INTO TABLE {table}
|
||||
FIELDS TERMINATED BY '\x1a' ESCAPED BY '\0'
|
||||
LINES TERMINATED BY '\n'
|
||||
({columns})
|
||||
SET app_id = {app_id},
|
||||
wh_etl_exec_id = {wh_etl_exec_id};
|
||||
"""
|
||||
|
||||
def __init__(self, args):
|
||||
self.wh_con = zxJDBC.connect(args[Constant.WH_DB_URL_KEY],
|
||||
args[Constant.WH_DB_USERNAME_KEY],
|
||||
args[Constant.WH_DB_PASSWORD_KEY],
|
||||
args[Constant.WH_DB_DRIVER_KEY])
|
||||
self.wh_cursor = self.wh_con.cursor()
|
||||
self.app_id = int(args[Constant.APP_ID_KEY])
|
||||
self.wh_etl_exec_id = int(args[Constant.WH_EXEC_ID_KEY])
|
||||
self.app_folder = args[Constant.WH_APP_FOLDER_KEY]
|
||||
self.metadata_folder = self.app_folder + "/" + str(self.app_id)
|
||||
|
||||
def run(self):
|
||||
self.read_file_to_stg()
|
||||
self.wh_cursor.close()
|
||||
self.wh_con.close()
|
||||
|
||||
def read_file_to_stg(self):
|
||||
t = self._tables["source_code_commit"]
|
||||
|
||||
# Clear stagging table
|
||||
query = self._clear_staging_tempalte.format(table=t.get("table"))
|
||||
print query
|
||||
self.wh_cursor.execute(query)
|
||||
self.wh_con.commit()
|
||||
|
||||
# Load file into stagging table
|
||||
query = self._read_file_template.format(folder=self.metadata_folder,
|
||||
file=t.get("file"),
|
||||
table=t.get("table"),
|
||||
columns=t.get("columns"),
|
||||
app_id=self.app_id,
|
||||
wh_etl_exec_id=self.wh_etl_exec_id)
|
||||
print query
|
||||
self.wh_cursor.execute(query)
|
||||
self.wh_con.commit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
props = sys.argv[1]
|
||||
ot = OwnerTransform(props)
|
||||
ot.run()
|
||||
@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
*/
|
||||
package metadata.etl.git;
|
||||
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
/**
|
||||
* Created by zechen on 12/8/15.
|
||||
*/
|
||||
public class GitMetadataEtlTest {
|
||||
GitMetadataEtl git;
|
||||
|
||||
@BeforeMethod
|
||||
public void setUp()
|
||||
throws Exception {
|
||||
this.git = new GitMetadataEtl(500, 0L);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtract()
|
||||
throws Exception {
|
||||
git.extract();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTransform()
|
||||
throws Exception {
|
||||
git.transform();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoad()
|
||||
throws Exception {
|
||||
git.load();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRun()
|
||||
throws Exception {
|
||||
git.run();
|
||||
}
|
||||
}
|
||||
@ -5,6 +5,8 @@ dependencies {
|
||||
compile externalDependency.slf4j_api
|
||||
compile externalDependency.slf4j_log4j
|
||||
compile externalDependency.spring_jdbc
|
||||
compile externalDependency.jgit
|
||||
compile externalDependency.jsoup
|
||||
testCompile externalDependency.testng
|
||||
testCompile project(":metadata-etl")
|
||||
}
|
||||
|
||||
@ -102,4 +102,8 @@ public class Constant {
|
||||
public static final String LDAP_GROUP_SEARCH_DOMAINS_KEY = "ldap.group.search.domains";
|
||||
public static final String LDAP_GROUP_SEARCH_RETURN_ATTRS_KEY = "ldap.group.search.return.attributes";
|
||||
|
||||
// git
|
||||
public static final String GIT_HOST_KEY = "git.host";
|
||||
public static final String GIT_PROJECT_WHITELIST_KEY = "git.project.whitelist";
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
*/
|
||||
package wherehows.common.schemas;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import wherehows.common.utils.GitUtil;
|
||||
|
||||
|
||||
/**
|
||||
* Created by zechen on 12/8/15.
|
||||
*/
|
||||
public class GitCommitRecord extends AbstractRecord {
|
||||
String gitRepoUrn;
|
||||
String commitId;
|
||||
String filePath;
|
||||
String fileName;
|
||||
Long commitTime;
|
||||
String committerName;
|
||||
String committerEmail;
|
||||
String authorName;
|
||||
String authorEmail;
|
||||
String message;
|
||||
|
||||
public GitCommitRecord() {
|
||||
}
|
||||
|
||||
public GitCommitRecord(GitUtil.CommitMetadata commitMetadata, String gitRepoUrn) {
|
||||
this.gitRepoUrn = gitRepoUrn;
|
||||
this.commitId = commitMetadata.getCommitId();
|
||||
this.filePath = commitMetadata.getFilePath();
|
||||
this.fileName = FilenameUtils.getName(this.filePath);
|
||||
this.commitTime = commitMetadata.getCommitTime().getTime() / 1000;
|
||||
this.committerName = commitMetadata.getCommitter();
|
||||
this.committerEmail = commitMetadata.getCommitterEmail();
|
||||
this.authorName = commitMetadata.getAuthor();
|
||||
this.authorEmail = commitMetadata.getAuthorEmail();
|
||||
this.message = commitMetadata.getMessage();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Object> fillAllFields() {
|
||||
List<Object> allFields = new ArrayList<>();
|
||||
allFields.add(gitRepoUrn);
|
||||
allFields.add(commitId);
|
||||
allFields.add(filePath);
|
||||
allFields.add(fileName);
|
||||
allFields.add(commitTime);
|
||||
allFields.add(committerName);
|
||||
allFields.add(committerEmail);
|
||||
allFields.add(authorName);
|
||||
allFields.add(authorEmail);
|
||||
allFields.add(message);
|
||||
return allFields;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,256 @@
|
||||
/**
|
||||
* Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
*/
|
||||
package wherehows.common.utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.eclipse.jgit.api.Git;
|
||||
import org.eclipse.jgit.api.errors.GitAPIException;
|
||||
import org.eclipse.jgit.lib.Constants;
|
||||
import org.eclipse.jgit.lib.PersonIdent;
|
||||
import org.eclipse.jgit.lib.Ref;
|
||||
import org.eclipse.jgit.lib.Repository;
|
||||
import org.eclipse.jgit.revwalk.RevCommit;
|
||||
import org.eclipse.jgit.revwalk.RevWalk;
|
||||
import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
|
||||
import org.eclipse.jgit.treewalk.TreeWalk;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Created by abhattac on 2/5/15,
|
||||
* Modified by zechen on 12/8/15.
|
||||
*/
|
||||
|
||||
public class GitUtil {
|
||||
private static final Logger logger = LoggerFactory.getLogger(GitUtil.class);
|
||||
private final static String DEFAULT_HOST = "gitli.corp.linkedin.com";
|
||||
public static final String HTTPS_PROTOCAL = "https";
|
||||
public static final String GIT_PROTOCAL = "git";
|
||||
public static final String GIT_SUBFIX = ".git";
|
||||
|
||||
/**
|
||||
* Cloning the remote git repo to local directory
|
||||
* @param remoteUri remote git url e.g. git://gitli.example.com/project/repo.git
|
||||
* @param localDir local destination clone directory
|
||||
* @throws IOException
|
||||
* @throws GitAPIException
|
||||
*/
|
||||
public static void clone(String remoteUri, String localDir) throws IOException, GitAPIException {
|
||||
//create local git directory
|
||||
File localGitRepo = new File(localDir);
|
||||
if (localGitRepo.exists()) {
|
||||
if (localGitRepo.isDirectory()) {
|
||||
// clean up directory
|
||||
FileUtils.cleanDirectory(localGitRepo);
|
||||
} else {
|
||||
throw new IOException("File exists: " + localDir);
|
||||
}
|
||||
} else {
|
||||
localGitRepo.mkdirs();
|
||||
}
|
||||
|
||||
Git g = Git.cloneRepository().setURI(remoteUri).setDirectory(localGitRepo).call();
|
||||
g.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Crawlling the project page to get list of repositories, only works for Gitorious
|
||||
* @param projectUrl the project url e.g. https://git.example.com/project
|
||||
* @return List of path of repositories e.g. project/repo
|
||||
* @throws IOException
|
||||
*/
|
||||
public static List<String> getRepoListFromProject(String projectUrl) throws IOException {
|
||||
|
||||
List<String> repoList = new LinkedList<>();
|
||||
Document doc = Jsoup.connect(projectUrl).get();
|
||||
Elements repos = doc.getElementsByClass("repository");
|
||||
|
||||
for (Element e : repos) {
|
||||
String repo = e.children().first().text();
|
||||
repoList.add(repo.trim());
|
||||
}
|
||||
|
||||
return repoList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch all commit metadata from the repo
|
||||
* @param repoDir repository directory
|
||||
* @return list of commit metadata
|
||||
* @throws IOException
|
||||
* @throws GitAPIException
|
||||
*/
|
||||
public static List<CommitMetadata> getRepoMetadata(String repoDir) throws IOException, GitAPIException {
|
||||
|
||||
List<CommitMetadata> metadataList = new ArrayList<>();
|
||||
|
||||
FileRepositoryBuilder builder = new FileRepositoryBuilder();
|
||||
Repository repository = builder.setGitDir(new File(repoDir, ".git")).readEnvironment().findGitDir().build();
|
||||
|
||||
// Current branch may not be master. Instead of hard coding determine the current branch
|
||||
String currentBranch = repository.getBranch();
|
||||
Ref head = repository.getRef("refs/heads/" + currentBranch); // current branch may not be "master"
|
||||
if (head == null) {
|
||||
return metadataList;
|
||||
}
|
||||
|
||||
Git git = new Git(repository);
|
||||
|
||||
RevWalk walk = new RevWalk(repository);
|
||||
RevCommit commit = walk.parseCommit(head.getObjectId());
|
||||
|
||||
TreeWalk treeWalk = new TreeWalk(repository);
|
||||
treeWalk.addTree(commit.getTree());
|
||||
treeWalk.setRecursive(true);
|
||||
while (treeWalk.next()) {
|
||||
String filePath = treeWalk.getPathString();
|
||||
Iterable<RevCommit> commitLog = git.log().add(repository.resolve(Constants.HEAD)).addPath(filePath).call();
|
||||
for (RevCommit r : commitLog) {
|
||||
CommitMetadata metadata = new CommitMetadata(r.getName());
|
||||
metadata.setFilePath(filePath);
|
||||
metadata.setMessage(r.getShortMessage().trim());
|
||||
// Difference between committer and author
|
||||
// refer to: http://git-scm.com/book/ch2-3.html
|
||||
PersonIdent committer = r.getCommitterIdent();
|
||||
PersonIdent author = r.getAuthorIdent();
|
||||
metadata.setAuthor(author.getName());
|
||||
metadata.setAuthorEmail(author.getEmailAddress());
|
||||
metadata.setCommitter(committer.getName());
|
||||
metadata.setCommitterEmail(committer.getEmailAddress());
|
||||
metadata.setCommitTime(committer.getWhen());
|
||||
metadataList.add(metadata);
|
||||
}
|
||||
}
|
||||
git.close();
|
||||
return metadataList;
|
||||
}
|
||||
|
||||
public static String getHttpsUrl(String host, String path) {
|
||||
return HTTPS_PROTOCAL + "://" + host + "/" + path;
|
||||
}
|
||||
|
||||
public static String getGitUrl(String host, String path) {
|
||||
return GIT_PROTOCAL + "://" + host + "/" + path + GIT_SUBFIX;
|
||||
}
|
||||
|
||||
public static String getSshUrl(String host, String path) {
|
||||
return GIT_PROTOCAL + "@" + host + ":" + path;
|
||||
}
|
||||
|
||||
public static class CommitMetadata {
|
||||
String commitId;
|
||||
String author;
|
||||
String committer;
|
||||
Date commitTime;
|
||||
String message;
|
||||
String committerEmail;
|
||||
String authorEmail;
|
||||
String filePath;
|
||||
|
||||
public CommitMetadata() {
|
||||
}
|
||||
|
||||
public CommitMetadata(String commitId) {
|
||||
this.commitId = commitId;
|
||||
}
|
||||
|
||||
public CommitMetadata(String commitId, String author, String committer, Date commitTime, String message,
|
||||
String committerEmail, String authorEmail, String filePath) {
|
||||
this.commitId = commitId;
|
||||
this.author = author;
|
||||
this.committer = committer;
|
||||
this.commitTime = commitTime;
|
||||
this.message = message;
|
||||
this.committerEmail = committerEmail;
|
||||
this.authorEmail = authorEmail;
|
||||
this.filePath = filePath;
|
||||
}
|
||||
|
||||
public String getCommitId() {
|
||||
return commitId;
|
||||
}
|
||||
|
||||
public void setCommitId(String commitId) {
|
||||
this.commitId = commitId;
|
||||
}
|
||||
|
||||
public String getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
public void setAuthor(String author) {
|
||||
this.author = author;
|
||||
}
|
||||
|
||||
public String getCommitter() {
|
||||
return committer;
|
||||
}
|
||||
|
||||
public void setCommitter(String committer) {
|
||||
this.committer = committer;
|
||||
}
|
||||
|
||||
public Date getCommitTime() {
|
||||
return commitTime;
|
||||
}
|
||||
|
||||
public void setCommitTime(Date commitTime) {
|
||||
this.commitTime = commitTime;
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return message;
|
||||
}
|
||||
|
||||
public void setMessage(String message) {
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
public String getCommitterEmail() {
|
||||
return committerEmail;
|
||||
}
|
||||
|
||||
public void setCommitterEmail(String committerEmail) {
|
||||
this.committerEmail = committerEmail;
|
||||
}
|
||||
|
||||
public String getAuthorEmail() {
|
||||
return authorEmail;
|
||||
}
|
||||
|
||||
public void setAuthorEmail(String authorEmail) {
|
||||
this.authorEmail = authorEmail;
|
||||
}
|
||||
|
||||
public String getFilePath() {
|
||||
return filePath;
|
||||
}
|
||||
|
||||
public void setFilePath(String filePath) {
|
||||
this.filePath = filePath;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Copyright 2015 LinkedIn Corp. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
*/
|
||||
package wherehows.common.utils;
|
||||
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
/**
|
||||
* Created by zechen on 12/8/15.
|
||||
*/
|
||||
public class GitUtilTest {
|
||||
|
||||
@Test
|
||||
public void testClone()
|
||||
throws Exception {
|
||||
//GitUtil.clone("git://git.example.com/project/repo.git", "/tmp/project/repo");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetRepoListFromProject()
|
||||
throws Exception {
|
||||
//List<String> repos = GitUtil.getRepoListFromProject("git://git.example.com/project");
|
||||
//Assert.assertTrue(repos.size() > 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetRepoMetadata()
|
||||
throws Exception {
|
||||
//GitUtil.getRepoMetadata("/tmp/project/repo");
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user