fix(cli): fix example data contract yaml + update airflow codecov (#9707)

This commit is contained in:
Harshal Sheth 2024-01-26 14:02:52 -08:00 committed by GitHub
parent 388b3ec0ac
commit 5adb799f13
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 39 additions and 20 deletions

View File

@ -87,8 +87,8 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
directory: .
fail_ci_if_error: false
flags: airflow-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
name: pytest-airflow
flags: airflow,airflow-${{ matrix.extra_pip_extras }}
name: pytest-airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_requirements }}
verbose: true
event-file:

View File

@ -108,7 +108,7 @@ task testQuick(type: Exec, dependsOn: installDevTest) {
inputs.files(project.fileTree(dir: "src/", include: "**/*.py"))
inputs.files(project.fileTree(dir: "tests/"))
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
"source ${venv_name}/bin/activate && pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml"
}

View File

@ -1,6 +1,17 @@
import pathlib
import site
def pytest_addoption(parser):
parser.addoption(
"--update-golden-files",
action="store_true",
default=False,
)
# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement
coverage_startup_code = "import coverage; coverage.process_startup()"
site_packages_dir = pathlib.Path(site.getsitepackages()[0])
pth_file_path = site_packages_dir / "datahub_coverage_startup.pth"
pth_file_path.write_text(coverage_startup_code)

View File

@ -1,21 +1,29 @@
# id: pet_details_dc # Optional: This is the unique identifier for the data contract
display_name: Data Contract for SampleHiveDataset
version: 1 # datahub yaml format version
# Note: this data contract yaml format is still in development, and will likely
# change in backwards-incompatible ways in the future.
entity: urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)
freshness:
time: 0700
granularity: DAILY
type: cron
cron: 0 7 * * * # 7am daily
timezone: America/Los_Angeles
schema:
properties:
field_foo:
type: string
native_type: VARCHAR(100)
field_bar:
type: boolean
required:
- field_bar
type: json-schema
json-schema:
properties:
field_foo:
type: string
native_type: VARCHAR(100)
field_bar:
type: boolean
required:
- field_bar
data_quality:
- type: column_range
config:
column: field_foo
min: 0
max: 100
- type: unique
column: field_foo
- type: custom_sql
sql: SELECT COUNT(*) FROM SampleHiveDataset
operator:
type: greater_than
value: 100