fix(ingest): profiling - disable expensive profilers by default (#3759)

This commit is contained in:
Tamas Nemeth 2021-12-18 02:17:25 +01:00 committed by GitHub
parent 926b6eed4e
commit 599edd22ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 45 additions and 27 deletions

View File

@ -70,7 +70,7 @@ sink:
Note that a `.` is used to denote nested fields in the YAML recipe. Note that a `.` is used to denote nested fields in the YAML recipe.
| Field | Required | Default | Description | | Field | Required | Default | Description |
| --------------------------------------------------- | -------- | --------------------------- | ------------------------------------------------------------------------------------ | | --------------------------------------------------- | -------- |----------------------| ------------------------------------------------------------------------------------ |
| `profiling.enabled` | | `False` | Whether profiling should be done. | | `profiling.enabled` | | `False` | Whether profiling should be done. |
| `profiling.limit` | | | Max number of documents to profile. By default, profiles all documents. | | `profiling.limit` | | | Max number of documents to profile. By default, profiles all documents. |
| `profiling.offset` | | | Offset in documents to profile. By default, uses no offset. | | `profiling.offset` | | | Offset in documents to profile. By default, uses no offset. |
@ -88,11 +88,10 @@ Note that a `.` is used to denote nested fields in the YAML recipe.
| `profiling.include_field_mean_value` | | `True` | Whether to profile for the mean value of numeric columns. | | `profiling.include_field_mean_value` | | `True` | Whether to profile for the mean value of numeric columns. |
| `profiling.include_field_median_value` | | `True` | Whether to profile for the median value of numeric columns. | | `profiling.include_field_median_value` | | `True` | Whether to profile for the median value of numeric columns. |
| `profiling.include_field_stddev_value` | | `True` | Whether to profile for the standard deviation of numeric columns. | | `profiling.include_field_stddev_value` | | `True` | Whether to profile for the standard deviation of numeric columns. |
| `profiling.include_field_quantiles` | | `True` | Whether to profile for the quantiles of numeric columns. | | `profiling.include_field_quantiles` | | `False` | Whether to profile for the quantiles of numeric columns. |
| `profiling.include_field_distinct_value_frequencies` | | `True` | Whether to profile for distinct value frequencies. | | `profiling.include_field_distinct_value_frequencies` | | `False` | Whether to profile for distinct value frequencies. |
| `profiling.include_field_histogram` | | `True` | Whether to profile for the histogram for numeric fields. | | `profiling.include_field_histogram` | | `False` | Whether to profile for the histogram for numeric fields. |
| `profiling.include_field_sample_values` | | `True` | Whether to profile for the sample values for all columns. | | `profiling.include_field_sample_values` | | `True` | Whether to profile for the sample values for all columns. |
## Compatibility ## Compatibility
Coming soon! Coming soon!

View File

@ -112,9 +112,9 @@ class GEProfilingConfig(ConfigModel):
include_field_mean_value: bool = True include_field_mean_value: bool = True
include_field_median_value: bool = True include_field_median_value: bool = True
include_field_stddev_value: bool = True include_field_stddev_value: bool = True
include_field_quantiles: bool = True include_field_quantiles: bool = False
include_field_distinct_value_frequencies: bool = True include_field_distinct_value_frequencies: bool = False
include_field_histogram: bool = True include_field_histogram: bool = False
include_field_sample_values: bool = True include_field_sample_values: bool = True
allow_deny_patterns: AllowDenyPattern = AllowDenyPattern.allow_all() allow_deny_patterns: AllowDenyPattern = AllowDenyPattern.allow_all()

View File

@ -20,7 +20,16 @@ source:
- "^test_cases" - "^test_cases"
profiling: profiling:
enabled: True enabled: True
include_field_null_count: true
include_field_min_value: true
include_field_max_value: true
include_field_mean_value: true
include_field_median_value: true
include_field_stddev_value: true
include_field_quantiles: true
include_field_distinct_value_frequencies: true
include_field_histogram: true
include_field_sample_values: true
sink: sink:
type: file type: file
config: config:

View File

@ -20,6 +20,16 @@ source:
- "library_catalog.librarydb.*" - "library_catalog.librarydb.*"
profiling: profiling:
enabled: True enabled: True
include_field_null_count: true
include_field_min_value: true
include_field_max_value: true
include_field_mean_value: true
include_field_median_value: true
include_field_stddev_value: true
include_field_quantiles: true
include_field_distinct_value_frequencies: true
include_field_histogram: true
include_field_sample_values: true
sink: sink:
type: file type: file