--- # see https://docs.datahub.com/docs/generated/ingestion/sources/bigquery for complete documentation source: type: "bigquery" config: ## Coordinates project_id: project-id-1234567 ## Credentials ## If GOOGLE_APPLICATION_CREDENTIALS environment variable is not set you can specify credentials here #credential: # project_id: project-id-1234567 # private_key_id: "d0121d0000882411234e11166c6aaa23ed5d74e0" # private_key: "-----BEGIN PRIVATE KEY-----\nMIIyourkey\n-----END PRIVATE KEY-----\n" # client_email: "test@suppproject-id-1234567.iam.gserviceaccount.com" # client_id: "123456678890" #include_tables: true #include_views: true #include_table_lineage: true #extract_policy_tags_from_catalog: true #start_time: 2021-12-15T20:08:23.091Z #end_time: 2023-12-15T20:08:23.091Z #profiling: # enabled: true # turn_off_expensive_profiling_metrics: false # query_combiner_enabled: true # max_number_of_fields_to_profile: 8 # profile_table_level_only: false # include_field_null_count: true # include_field_min_value: true # include_field_max_value: true # include_field_mean_value: true # include_field_median_value: true # include_field_stddev_value: false # include_field_quantiles: false # include_field_distinct_value_frequencies: false # include_field_histogram: false # include_field_sample_values: false #profile_pattern: # allow: # - "schema.table.column" # deny: # - "*.*.*" #storage_project_id: project-id-1234567 ## Lineage with GCS Source # include_column_lineage_with_gcs: true/false # gcs_lineage_config: # path_specs: # - include: "gs://my-bucket/foo/tests/bar.avro" # - include: "gs://my-bucket/foo/tests/*.*" # - include: "gs://my-bucket/foo/tests/{table}/*.avro" # - include: "gs://my-bucket/foo/tests/{table}/*/*.avro" # - include: "gs://my-bucket/foo/tests/{table}/*.*" # - include: "gs://my-bucket/{dept}/tests/{table}/*.avro" # - include: "gs://my-bucket/{dept}/tests/{table}/{partition_key[0]}={partition[0]}/{partition_key[1]}={partition[1]}/*.avro" # - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.avro" # - include: "gs://my-bucket/{dept}/tests/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*" # - include: "gs://my-bucket/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*" # - include: "gs://my-bucket/*/*/{table}/{partition[0]}/{partition[1]}/{partition[2]}/*.*" # strip_urls: false ## see https://docs.datahub.com/docs/metadata-ingestion/sink_docs/datahub for complete documentation sink: type: "datahub-rest" config: server: "http://localhost:8080"