mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-05 16:22:17 +00:00
67 lines
2.8 KiB
YAML
67 lines
2.8 KiB
YAML
---
|
|
# see https://docs.datahub.com/docs/generated/ingestion/sources/bigquery for complete documentation
|
|
source:
|
|
type: "bigquery"
|
|
config:
|
|
## Coordinates
|
|
project_id: project-id-1234567
|
|
## Credentials
|
|
## If GOOGLE_APPLICATION_CREDENTIALS environment variable is not set you can specify credentials here
|
|
#credential:
|
|
# project_id: project-id-1234567
|
|
# private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0"
|
|
# private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n"
|
|
# client_email: "test@suppproject-id-1234567.iam.gserviceaccount.com"
|
|
# client_id: "123456678890"
|
|
#include_tables: true
|
|
#include_views: true
|
|
#include_table_lineage: true
|
|
#extract_policy_tags_from_catalog: true
|
|
#start_time: 2021-12-15T20:08:23.091Z
|
|
#end_time: 2023-12-15T20:08:23.091Z
|
|
#profiling:
|
|
# enabled: true
|
|
# turn_off_expensive_profiling_metrics: false
|
|
# query_combiner_enabled: true
|
|
# max_number_of_fields_to_profile: 8
|
|
# profile_table_level_only: false
|
|
# include_field_null_count: true
|
|
# include_field_min_value: true
|
|
# include_field_max_value: true
|
|
# include_field_mean_value: true
|
|
# include_field_median_value: true
|
|
# include_field_stddev_value: false
|
|
# include_field_quantiles: false
|
|
# include_field_distinct_value_frequencies: false
|
|
# include_field_histogram: false
|
|
# include_field_sample_values: false
|
|
#profile_pattern:
|
|
# allow:
|
|
# - "schema.table.column"
|
|
# deny:
|
|
# - "*.*.*"
|
|
#storage_project_id: project-id-1234567
|
|
## Lineage with GCS Source
|
|
# include_column_lineage_with_gcs: true/false
|
|
# gcs_lineage_config:
|
|
# path_specs:
|
|
# - include: "gs://my-bucket/foo/tests/bar.avro"
|
|
# - include: "gs://my-bucket/foo/tests/*.*"
|
|
# - include: "gs://my-bucket/foo/tests/{table}/*.avro"
|
|
# - include: "gs://my-bucket/foo/tests/{table}/*/*.avro"
|
|
# - include: "gs://my-bucket/foo/tests/{table}/*.*"
|
|
# - include: "gs://my-bucket/{dept}/tests/{table}/*.avro"
|
|
# - include: "gs://my-bucket/{dept}/tests/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.avro"
|
|
# - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.avro"
|
|
# - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
|
|
# - include: "gs://my-bucket/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
|
|
# - include: "gs://my-bucket/*/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*"
|
|
# strip_urls: false
|
|
|
|
|
|
## see https://docs.datahub.com/docs/metadata-ingestion/sink_docs/datahub for complete documentation
|
|
sink:
|
|
type: "datahub-rest"
|
|
config:
|
|
server: "http://localhost:8080"
|