mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-11 08:52:58 +00:00
add a few new hdfs directory patterns
This commit is contained in:
parent
1cd5872369
commit
53d40c8392
@ -126,10 +126,10 @@ public class SchemaFetch {
|
|||||||
if (!fstat.isDirectory()) {
|
if (!fstat.isDirectory()) {
|
||||||
// file
|
// file
|
||||||
fileCount++;
|
fileCount++;
|
||||||
} else if (objName.matches("(_|\\.|tmp|temp|_distcp|\\*|test).*")) {
|
} else if (objName.matches("(_|\\.|tmp|temp|_distcp|backup|\\*|test|trash).*")) {
|
||||||
// hidden/temporary fs object
|
// hidden/temporary fs object
|
||||||
hiddenFileCount++;
|
hiddenFileCount++;
|
||||||
} else if (objName.matches("daily|hourly|monthly|weekly|year=[0-9]+|month=[0-9]+|country=.*")) {
|
} else if (objName.matches("daily|hourly|hourly.deduped|monthly|weekly|(ds|dt|datepartition|year|month|date)=[0-9-]+")) {
|
||||||
// temporal partition type
|
// temporal partition type
|
||||||
datePartitionCount++;
|
datePartitionCount++;
|
||||||
} else if (objName.matches(
|
} else if (objName.matches(
|
||||||
@ -167,8 +167,8 @@ public class SchemaFetch {
|
|||||||
throws IOException, InterruptedException, SQLException {
|
throws IOException, InterruptedException, SQLException {
|
||||||
String curPath = path.toUri().getPath();
|
String curPath = path.toUri().getPath();
|
||||||
Path n = path;
|
Path n = path;
|
||||||
//if (path.getName().matches("^(\\.|_|tmp|temp|test|\\*|archive|ARCHIVE|storkinternal).*"))
|
if (path.getName().matches("^(\\.|_|tmp|temp|test|trash|backup|archive|ARCHIVE|storkinternal).*"))
|
||||||
// return;
|
return;
|
||||||
|
|
||||||
logger.info(" -- scanPath(" + curPath + ")\n");
|
logger.info(" -- scanPath(" + curPath + ")\n");
|
||||||
int x = isTable(path, scanFs);
|
int x = isTable(path, scanFs);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user