datahub/metadata-ingestion/tests/unit/test_classification.py

55 lines
1.9 KiB
Python
Raw Normal View History

from datahub.ingestion.glossary.classifier import ClassificationConfig
from datahub.ingestion.glossary.datahub_classifier import DataHubClassifier
def test_default_classification_config():
default_classification_config = ClassificationConfig(enabled=True)
assert default_classification_config.classifiers[0].type == "datahub"
assert default_classification_config.classifiers[0].config is None
def test_default_datahub_classifier_config():
default_datahub_classifier = DataHubClassifier.create(config_dict=None)
assert default_datahub_classifier.config.info_types_config is not None
def test_selective_datahub_classifier_config_override():
simple_config_override = DataHubClassifier.create(
config_dict={"confidence_level_threshold": 0.7}
).config
assert simple_config_override.info_types_config is not None
complex_config_override = DataHubClassifier.create(
config_dict={
"confidence_level_threshold": 0.7,
"info_types_config": {
"Age": {
"Prediction_Factors_and_Weights": {
"Name": 0.5,
"Description": 0,
"Datatype": 0,
"Values": 0.5,
},
},
},
}
).config
assert complex_config_override.info_types_config is not None
assert (
complex_config_override.info_types_config[
"Age"
].Prediction_Factors_and_Weights.Name
== 0.5
)
assert (
complex_config_override.info_types_config[
"Age"
].Prediction_Factors_and_Weights.Values
== 0.5
)
assert complex_config_override.info_types_config["Age"].Name is not None
assert complex_config_override.info_types_config["Age"].Values is not None
assert complex_config_override.info_types_config["Email_Address"] is not None