adding allow deny patterns to sql config

This commit is contained in:
Harshal Sheth 2021-02-15 17:53:29 -08:00 committed by Shirshanka Das
parent 62bb7f012f
commit 4b83fc6591
7 changed files with 78 additions and 18 deletions

View File

@ -10,7 +10,10 @@
# Run tests
- pip install -r test_requirements.txt
- pytest
# Run Unit tests
- pytest tests/unit
# Run Integration tests
- pytest tests/integration
# Sanity check code before checkin (currently broken)
- flake8 src test && mypy -p gometa && black --check -l 120 src test && isort --check-only src test && pytest

View File

@ -0,0 +1,10 @@
---
source:
type: mssql
mssql:
username: sa
password: test!Password
database: DemoData
sink:
type: console

View File

@ -7,4 +7,6 @@ source:
database: DemoData
sink:
type: console
type: "datahub-rest"
datahub-rest:
server: 'http://localhost:8080'

View File

@ -1,7 +1,8 @@
from abc import ABC, abstractmethod
from typing import TypeVar, Type
from typing import TypeVar, Type, List
from pydantic import BaseModel, ValidationError
from pathlib import Path
import re
class ConfigModel(BaseModel):
@ -29,6 +30,27 @@ class ConfigurationMechanism(ABC):
def load_config(self, cls: Type[T], config_file: Path) -> T:
pass
class AllowDenyPattern(BaseModel):
""" A class to store allow deny regexes"""
allow: List[str] = [".*"]
deny: List[str] = []
@classmethod
def allow_all(cls):
return AllowDenyPattern()
def allowed(self, string: str) -> bool:
for deny_pattern in self.deny:
if re.match(deny_pattern, string):
return False
for allow_pattern in self.allow:
if re.match(allow_pattern, string):
return True
return False
class DynamicFactory:
def __init__(self):

View File

@ -7,10 +7,11 @@ from gometa.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata, MyS
from gometa.metadata.com.linkedin.pegasus2avro.common import AuditStamp
from gometa.ingestion.api.source import WorkUnit
from gometa.configuration.common import AllowDenyPattern
from pydantic import BaseModel
import logging
import time
from typing import Optional
from typing import Optional, List
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@ -23,6 +24,7 @@ class SQLAlchemyConfig(BaseModel):
database: str = ""
scheme: str
options: Optional[dict] = {}
table_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
def get_sql_alchemy_url(self):
url=f'{self.scheme}://{self.username}:{self.password}@{self.host_port}/{self.database}'
@ -64,6 +66,7 @@ def get_schema_metadata(dataset_name, platform, columns) -> SchemaMetadata:
def get_column_type(column_type):
"""
Maps SQLAlchemy types (https://docs.sqlalchemy.org/en/13/core/type_basics.html) to corresponding schema types
@ -95,20 +98,23 @@ def get_sql_workunits(sql_config:SQLAlchemyConfig, platform: str, env: str = "PR
database = sql_config.database
for schema in inspector.get_schema_names():
for table in inspector.get_table_names(schema):
columns = inspector.get_columns(table, schema)
mce = MetadataChangeEvent()
if database != "":
dataset_name = f'{database}.{schema}.{table}'
else:
dataset_name = f'{schema}.{table}'
if sql_config.table_pattern.allowed(f'{schema}.{table}'):
columns = inspector.get_columns(table, schema)
mce = MetadataChangeEvent()
if database != "":
dataset_name = f'{database}.{schema}.{table}'
else:
dataset_name = f'{schema}.{table}'
dataset_snapshot = DatasetSnapshot()
dataset_snapshot.urn=(
f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{env})"
)
schema_metadata = get_schema_metadata(dataset_name, platform, columns)
dataset_snapshot.aspects.append(schema_metadata)
mce.proposedSnapshot = dataset_snapshot
yield SqlWorkUnit(id=dataset_name, mce = mce)
dataset_snapshot = DatasetSnapshot()
dataset_snapshot.urn=(
f"urn:li:dataset:(urn:li:dataPlatform:{platform},{dataset_name},{env})"
)
schema_metadata = get_schema_metadata(dataset_name, platform, columns)
dataset_snapshot.aspects.append(schema_metadata)
mce.proposedSnapshot = dataset_snapshot
yield SqlWorkUnit(id=dataset_name, mce = mce)
else:
logger.debug(f"Found table: {schema}.{table}, but skipping due to allow-deny patterns")

View File

@ -10,6 +10,8 @@ def test_ingest(sql_server, pytestconfig):
ret = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
assert ret.returncode == 0
config_file=os.path.join(str(pytestconfig.rootdir), "tests/integration/sql_server", "mssql_to_console.yml")
# delete the output directory. TODO: move to a better way to create an output test fixture
os.system("rm -rf output")
ingest_command=f'gometa-ingest -c {config_file}'
ret = os.system(ingest_command)
assert ret == 0

View File

@ -0,0 +1,15 @@
from gometa.configuration.common import AllowDenyPattern
def test_allow_all():
pattern = AllowDenyPattern.allow_all()
assert pattern.allowed("foo.table") == True
def test_deny_all():
pattern = AllowDenyPattern(allow=[], deny=[".*"])
assert pattern.allowed("foo.table") == False
def test_single_table():
pattern = AllowDenyPattern(allow=["foo.mytable"])
assert pattern.allowed("foo.mytable") == True