mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-07 06:13:40 +00:00
fix(ingest): profiling - disable expensive profilers by default (#3759)
This commit is contained in:
parent
926b6eed4e
commit
599edd22ae
@ -70,7 +70,7 @@ sink:
|
|||||||
Note that a `.` is used to denote nested fields in the YAML recipe.
|
Note that a `.` is used to denote nested fields in the YAML recipe.
|
||||||
|
|
||||||
| Field | Required | Default | Description |
|
| Field | Required | Default | Description |
|
||||||
| --------------------------------------------------- | -------- | --------------------------- | ------------------------------------------------------------------------------------ |
|
| --------------------------------------------------- | -------- |----------------------| ------------------------------------------------------------------------------------ |
|
||||||
| `profiling.enabled` | | `False` | Whether profiling should be done. |
|
| `profiling.enabled` | | `False` | Whether profiling should be done. |
|
||||||
| `profiling.limit` | | | Max number of documents to profile. By default, profiles all documents. |
|
| `profiling.limit` | | | Max number of documents to profile. By default, profiles all documents. |
|
||||||
| `profiling.offset` | | | Offset in documents to profile. By default, uses no offset. |
|
| `profiling.offset` | | | Offset in documents to profile. By default, uses no offset. |
|
||||||
@ -88,11 +88,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
|
|||||||
| `profiling.include_field_mean_value` | | `True` | Whether to profile for the mean value of numeric columns. |
|
| `profiling.include_field_mean_value` | | `True` | Whether to profile for the mean value of numeric columns. |
|
||||||
| `profiling.include_field_median_value` | | `True` | Whether to profile for the median value of numeric columns. |
|
| `profiling.include_field_median_value` | | `True` | Whether to profile for the median value of numeric columns. |
|
||||||
| `profiling.include_field_stddev_value` | | `True` | Whether to profile for the standard deviation of numeric columns. |
|
| `profiling.include_field_stddev_value` | | `True` | Whether to profile for the standard deviation of numeric columns. |
|
||||||
| `profiling.include_field_quantiles` | | `True` | Whether to profile for the quantiles of numeric columns. |
|
| `profiling.include_field_quantiles` | | `False` | Whether to profile for the quantiles of numeric columns. |
|
||||||
| `profiling.include_field_distinct_value_frequencies` | | `True` | Whether to profile for distinct value frequencies. |
|
| `profiling.include_field_distinct_value_frequencies` | | `False` | Whether to profile for distinct value frequencies. |
|
||||||
| `profiling.include_field_histogram` | | `True` | Whether to profile for the histogram for numeric fields. |
|
| `profiling.include_field_histogram` | | `False` | Whether to profile for the histogram for numeric fields. |
|
||||||
| `profiling.include_field_sample_values` | | `True` | Whether to profile for the sample values for all columns. |
|
| `profiling.include_field_sample_values` | | `True` | Whether to profile for the sample values for all columns. |
|
||||||
|
|
||||||
## Compatibility
|
## Compatibility
|
||||||
|
|
||||||
Coming soon!
|
Coming soon!
|
||||||
|
|||||||
@ -112,9 +112,9 @@ class GEProfilingConfig(ConfigModel):
|
|||||||
include_field_mean_value: bool = True
|
include_field_mean_value: bool = True
|
||||||
include_field_median_value: bool = True
|
include_field_median_value: bool = True
|
||||||
include_field_stddev_value: bool = True
|
include_field_stddev_value: bool = True
|
||||||
include_field_quantiles: bool = True
|
include_field_quantiles: bool = False
|
||||||
include_field_distinct_value_frequencies: bool = True
|
include_field_distinct_value_frequencies: bool = False
|
||||||
include_field_histogram: bool = True
|
include_field_histogram: bool = False
|
||||||
include_field_sample_values: bool = True
|
include_field_sample_values: bool = True
|
||||||
|
|
||||||
allow_deny_patterns: AllowDenyPattern = AllowDenyPattern.allow_all()
|
allow_deny_patterns: AllowDenyPattern = AllowDenyPattern.allow_all()
|
||||||
|
|||||||
@ -20,7 +20,16 @@ source:
|
|||||||
- "^test_cases"
|
- "^test_cases"
|
||||||
profiling:
|
profiling:
|
||||||
enabled: True
|
enabled: True
|
||||||
|
include_field_null_count: true
|
||||||
|
include_field_min_value: true
|
||||||
|
include_field_max_value: true
|
||||||
|
include_field_mean_value: true
|
||||||
|
include_field_median_value: true
|
||||||
|
include_field_stddev_value: true
|
||||||
|
include_field_quantiles: true
|
||||||
|
include_field_distinct_value_frequencies: true
|
||||||
|
include_field_histogram: true
|
||||||
|
include_field_sample_values: true
|
||||||
sink:
|
sink:
|
||||||
type: file
|
type: file
|
||||||
config:
|
config:
|
||||||
|
|||||||
@ -20,6 +20,16 @@ source:
|
|||||||
- "library_catalog.librarydb.*"
|
- "library_catalog.librarydb.*"
|
||||||
profiling:
|
profiling:
|
||||||
enabled: True
|
enabled: True
|
||||||
|
include_field_null_count: true
|
||||||
|
include_field_min_value: true
|
||||||
|
include_field_max_value: true
|
||||||
|
include_field_mean_value: true
|
||||||
|
include_field_median_value: true
|
||||||
|
include_field_stddev_value: true
|
||||||
|
include_field_quantiles: true
|
||||||
|
include_field_distinct_value_frequencies: true
|
||||||
|
include_field_histogram: true
|
||||||
|
include_field_sample_values: true
|
||||||
|
|
||||||
sink:
|
sink:
|
||||||
type: file
|
type: file
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user