MINOR - Skip delta tests for 3.11 (#14398)

* MINOR - Bump delta for 3.11

* Update flags

* MINOR - Bump delta for 3.11

* Update tests regex

* Update version

* Deprecations

* Format

* Version

* Try delta spark

* Skip delta tests for 3.11

* Update ingestion/tests/unit/topology/pipeline/test_airflow.py
This commit is contained in:
Pere Miquel Brull 2023-12-18 17:01:57 +01:00 committed by GitHub
parent 1af1caf065
commit a83a5ba3a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 73 additions and 67 deletions

View File

@ -201,7 +201,7 @@ class DeltalakeSource(DatabaseServiceSource):
:return: tables or views, depending on config
"""
schema_name = self.context.database_schema
for table in self.spark.catalog.listTables(schema_name):
for table in self.spark.catalog.listTables(dbName=schema_name):
try:
table_name = table.name
table_fqn = fqn.build(

View File

@ -64,6 +64,7 @@ and we'll treat this as independent sets of lineage
"""
import json
import logging
import textwrap
import traceback
from collections import defaultdict
from copy import deepcopy
@ -200,7 +201,12 @@ def _parse_xlets(xlet: Any) -> None:
@_parse_xlets.register
@deprecated(
message="Please update your inlets/outlets to follow <TODO DOCS>",
message=textwrap.dedent(
"""
Please update your inlets/outlets to follow
https://docs.open-metadata.org/connectors/pipeline/airflow/configuring-lineage
"""
),
release="1.4.0",
)
def dictionary_lineage_annotation(xlet: dict) -> Dict[str, List[OMEntity]]:

View File

@ -136,13 +136,13 @@ class OMetaDomainTest(TestCase):
"""
res: Domain = self.metadata.create_or_update(data=self.create_domain)
self.assertEquals(res.name, self.create_domain.name)
self.assertEquals(res.description, self.create_domain.description)
self.assertEqual(res.name, self.create_domain.name)
self.assertEqual(res.description, self.create_domain.description)
res: DataProduct = self.metadata.create_or_update(data=self.create_data_product)
self.assertEquals(res.name, self.create_data_product.name)
self.assertEquals(res.description, self.create_data_product.description)
self.assertEquals(res.domain.name, self.create_data_product.domain.__root__)
self.assertEqual(res.name, self.create_data_product.name)
self.assertEqual(res.description, self.create_data_product.description)
self.assertEqual(res.domain.name, self.create_data_product.domain.__root__)
def test_get_name(self):
"""We can fetch Domains & Data Products by name"""
@ -189,4 +189,4 @@ class OMetaDomainTest(TestCase):
entity=Dashboard, fqn=self.dashboard.fullyQualifiedName, fields=["domain"]
)
self.assertEquals(updated_dashboard.domain.name, domain.name.__root__)
self.assertEqual(updated_dashboard.domain.name, domain.name.__root__)

View File

@ -285,7 +285,7 @@ class OMetaESTest(TestCase):
'{"query": {"bool": {"must": [{"term": {"processedLineage": true}},'
' {"term": {"service.name.keyword": "my_service"}}]}}}'
)
self.assertEquals(res, quote(expected))
self.assertEqual(res, quote(expected))
def test_get_queries_with_lineage(self):
"""Check the payload from ES"""

View File

@ -225,7 +225,7 @@ class TestSuiteWorkflowTests(unittest.TestCase):
)[0]
self.assertIsNone(table.testSuite)
self.assertEquals(
self.assertEqual(
table_and_tests.right.executable_test_suite.name.__root__,
self.table.fullyQualifiedName.__root__ + ".testSuite",
)

View File

@ -56,7 +56,7 @@ class TestAirflowLineageParser(TestCase):
For this test, we will assume that by having the same FQN, the
entity type will also be the same.
"""
self.assertEquals(len(first), len(second))
self.assertEqual(len(first), len(second))
for xlet1 in first:
match = False
@ -305,7 +305,7 @@ class TestAirflowLineageParser(TestCase):
fqn="FQN",
key="test",
)
self.assertEquals(
self.assertEqual(
str(om_entity),
'{"entity": "metadata.generated.schema.entity.data.table.Table", "fqn": "FQN", "key": "test"}',
)
@ -315,7 +315,7 @@ class TestAirflowLineageParser(TestCase):
fqn="FQN",
key="test",
)
self.assertEquals(
self.assertEqual(
str(om_entity),
'{"entity": "metadata.generated.schema.entity.data.container.Container", "fqn": "FQN", "key": "test"}',
)
@ -332,7 +332,7 @@ class TestAirflowLineageParser(TestCase):
"""
self.assertIsNone(_parse_xlets("random"))
self.assertEquals(
self.assertEqual(
_parse_xlets(
'{"entity": "metadata.generated.schema.entity.data.table.Table", "fqn": "FQN", "key": "test"}'
),
@ -347,7 +347,7 @@ class TestAirflowLineageParser(TestCase):
},
)
self.assertEquals(
self.assertEqual(
_parse_xlets(
'{"entity": "metadata.generated.schema.entity.data.container.Container", "fqn": "FQN", "key": "test"}'
),
@ -362,7 +362,7 @@ class TestAirflowLineageParser(TestCase):
},
)
self.assertEquals(
self.assertEqual(
_parse_xlets(
'{"entity": "metadata.generated.schema.entity.data.dashboard.Dashboard", "fqn": "FQN", "key": "test"}'
),
@ -385,7 +385,7 @@ class TestAirflowLineageParser(TestCase):
key="test",
)
self.assertEquals(
self.assertEqual(
serialize(om_entity).get("__data__"),
'{"entity": "metadata.generated.schema.entity.data.table.Table", "fqn": "FQN", "key": "test"}',
)

View File

@ -45,8 +45,8 @@ class TestDataFrameReader(TestCase):
self.assertIsNotNone(df_list)
self.assertTrue(len(df_list))
self.assertEquals(df_list[0].shape, (5, 2))
self.assertEquals(
self.assertEqual(df_list[0].shape, (5, 2))
self.assertEqual(
list(df_list[0].columns), ["transaction_id", "transaction_value"]
)
@ -62,8 +62,8 @@ class TestDataFrameReader(TestCase):
self.assertIsNotNone(df_list)
self.assertTrue(len(df_list))
self.assertEquals(df_list[0].shape, (5, 2))
self.assertEquals(
self.assertEqual(df_list[0].shape, (5, 2))
self.assertEqual(
list(df_list[0].columns), ["transaction_id", "transaction_value"]
)
@ -81,8 +81,8 @@ class TestDataFrameReader(TestCase):
self.assertIsNotNone(df_list)
self.assertTrue(len(df_list))
self.assertEquals(df_list[0].shape, (5, 2))
self.assertEquals(
self.assertEqual(df_list[0].shape, (5, 2))
self.assertEqual(
list(df_list[0].columns), ["transaction_id", "transaction_value"]
)
@ -98,8 +98,8 @@ class TestDataFrameReader(TestCase):
self.assertIsNotNone(df_list)
self.assertTrue(len(df_list))
self.assertEquals(df_list[0].shape, (4, 4))
self.assertEquals(
self.assertEqual(df_list[0].shape, (4, 4))
self.assertEqual(
list(df_list[0].columns),
["name", "id", "version", "Company"],
)
@ -116,8 +116,8 @@ class TestDataFrameReader(TestCase):
self.assertIsNotNone(df_list)
self.assertTrue(len(df_list))
self.assertEquals(df_list[0].shape, (4, 8))
self.assertEquals(
self.assertEqual(df_list[0].shape, (4, 8))
self.assertEqual(
list(df_list[0].columns),
[
"Boolean",

View File

@ -250,7 +250,7 @@ class TestLkmlParser(TestCase):
),
]
self.assertEquals(cols, expected_cols)
self.assertEqual(cols, expected_cols)
def test_view_col_parser(self):
"""
@ -278,4 +278,4 @@ class TestLkmlParser(TestCase):
),
]
self.assertEquals(cols, expected_cols)
self.assertEqual(cols, expected_cols)

View File

@ -14,6 +14,8 @@ Test Deltalake using the topology
Here we don't need to patch, as we can just create our own metastore
"""
import shutil
import sys
import unittest
from datetime import date, datetime
from unittest import TestCase
@ -100,6 +102,10 @@ MOCK_DATABASE_SCHEMA = DatabaseSchema(
)
@unittest.skipUnless(
sys.version_info < (3, 11),
reason="https://github.com/open-metadata/OpenMetadata/issues/14408",
)
class DeltaLakeUnitTest(TestCase):
"""
Add method validations from Deltalake ingestion

View File

@ -151,7 +151,7 @@ class TestAirflow(TestCase):
owners=None,
)
self.assertEquals(
self.assertEqual(
dag.tasks[0].inlets,
[
{
@ -162,7 +162,7 @@ class TestAirflow(TestCase):
}
],
)
self.assertEquals(
self.assertEqual(
dag.tasks[1].outlets,
[
{
@ -178,10 +178,10 @@ class TestAirflow(TestCase):
"""
pipeline_data = {"schedule_interval": None}
self.assertEquals(get_schedule_interval(pipeline_data), None)
self.assertIsNone(get_schedule_interval(pipeline_data))
pipeline_data = {"schedule_interval": {"__var": 86400.0, "__type": "timedelta"}}
self.assertEquals(get_schedule_interval(pipeline_data), "1 day, 0:00:00")
self.assertEqual(get_schedule_interval(pipeline_data), "1 day, 0:00:00")
pipeline_data = {
"timetable": {
@ -189,7 +189,7 @@ class TestAirflow(TestCase):
"__var": {},
}
}
self.assertEquals(get_schedule_interval(pipeline_data), "@once")
self.assertEqual(get_schedule_interval(pipeline_data), "@once")
pipeline_data = {
"timetable": {
@ -197,4 +197,4 @@ class TestAirflow(TestCase):
"__var": {"expression": "*/2 * * * *", "timezone": "UTC"},
}
}
self.assertEquals(get_schedule_interval(pipeline_data), "*/2 * * * *")
self.assertEqual(get_schedule_interval(pipeline_data), "*/2 * * * *")

View File

@ -33,7 +33,7 @@ class TestDeprecationWarning(TestCase):
self.deprecated_call()
# Verify the result
self.assertEquals(len(warn), 1)
self.assertEqual(len(warn), 1)
self.assertTrue(issubclass(warn[0].category, DeprecationWarning))
self.assertTrue("This is a deprecation" in str(warn[0].message))
self.assertTrue("x.y.z" in str(warn[0].message))

View File

@ -21,28 +21,28 @@ class StoredProceduresTests(TestCase):
def test_get_procedure_name_from_call(self):
"""Check that we properly parse CALL queries"""
self.assertEquals(
self.assertEqual(
get_procedure_name_from_call(
query_text="CALL db.schema.procedure_name(...)",
),
"procedure_name",
)
self.assertEquals(
self.assertEqual(
get_procedure_name_from_call(
query_text="CALL schema.procedure_name(...)",
),
"procedure_name",
)
self.assertEquals(
self.assertEqual(
get_procedure_name_from_call(
query_text="CALL procedure_name(...)",
),
"procedure_name",
)
self.assertEquals(
self.assertEqual(
get_procedure_name_from_call(
query_text="CALL DB.SCHEMA.PROCEDURE_NAME(...)",
),

View File

@ -163,14 +163,12 @@ class TestBaseWorkflow(TestCase):
@pytest.mark.order(2)
def test_workflow_status(self):
# Everything is processed properly in the Source
self.assertEquals(
self.workflow.source.status.records, ["0", "1", "2", "3", "4"]
)
self.assertEquals(len(self.workflow.source.status.failures), 0)
self.assertEqual(self.workflow.source.status.records, ["0", "1", "2", "3", "4"])
self.assertEqual(len(self.workflow.source.status.failures), 0)
# We catch one error in the Sink
self.assertEquals(len(self.workflow.steps[0].status.records), 4)
self.assertEquals(len(self.workflow.steps[0].status.failures), 1)
self.assertEqual(len(self.workflow.steps[0].status.records), 4)
self.assertEqual(len(self.workflow.steps[0].status.failures), 1)
@pytest.mark.order(3)
def test_workflow_raise_status(self):

View File

@ -40,9 +40,8 @@ Configure and schedule Deltalake metadata and profiler workflows from the OpenMe
## Requirements
{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%}
To deploy OpenMetadata, check the Deployment guides.
{%/inlineCallout%}
Deltalake requires to run with Python 3.8, 3.9 or 3.10. We do not yet support the Delta connector
for Python 3.11
## Metadata Ingestion

View File

@ -38,11 +38,8 @@ Configure and schedule Deltalake metadata and profiler workflows from the OpenMe
## Requirements
{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%}
To deploy OpenMetadata, check the Deployment guides.
{%/inlineCallout%}
Deltalake requires to run with Python 3.8, 3.9 or 3.10. We do not yet support the Delta connector
for Python 3.11
### Python Requirements

View File

@ -233,7 +233,7 @@ public abstract class EntityResourceTest<T extends EntityInterface, K extends Cr
public static final String DATA_CONSUMER_ROLE_NAME = "DataConsumer";
public static final String ENTITY_LINK_MATCH_ERROR =
"[entityLink must match \"^(?U)<#E::\\w+::[\\w'\\- .&/:+\"\\\\()$#%]+>$\"]";
"[entityLink must match \"(?U)^<#E::\\w+::[\\w'\\- .&/:+\"\\\\()$#%]+>$\"]";
// Random unicode string generator to test entity name accepts all the unicode characters
protected static final RandomStringGenerator RANDOM_STRING_GENERATOR =

View File

@ -199,7 +199,7 @@ public class FeedResourceTest extends OpenMetadataApplicationTest {
// Create thread without addressed to entity in the request
CreateThread create = create().withFrom(USER.getName()).withAbout("<>"); // Invalid EntityLink
String failureReason = "[about must match \"^(?U)<#E::\\w+::[\\w'\\- .&/:+\"\\\\()$#%]+>$\"]";
String failureReason = "[about must match \"(?U)^<#E::\\w+::[\\w'\\- .&/:+\"\\\\()$#%]+>$\"]";
assertResponseContains(() -> createThread(create, USER_AUTH_HEADERS), BAD_REQUEST, failureReason);
create.withAbout("<#E::>"); // Invalid EntityLink - missing entityType and entityId

View File

@ -351,7 +351,7 @@ public class GlossaryResourceTest extends EntityResourceTest<Glossary, CreateGlo
Awaitility.await().atMost(4, TimeUnit.SECONDS).until(() -> true);
assertSummary(result, ApiStatus.FAILURE, 2, 1, 1);
String[] expectedRows = {
resultsHeader, getFailedRecord(record, "[name must match \"\"^(?U)[\\w'\\- .&()%]+$\"\"]")
resultsHeader, getFailedRecord(record, "[name must match \"\"(?U)^[\\w'\\- .&()%]+$\"\"]")
};
assertRows(result, expectedRows);

View File

@ -73,7 +73,7 @@ public class TypeResourceTest extends EntityResourceTest<Type, CreateType> {
// Names can't start with capital letter, can't have space, hyphen, apostrophe
String[] tests = {"a bc", "a-bc", "a'b"};
String error = "[name must match \"^(?U)[\\w]+$\"]";
String error = "[name must match \"(?U)^[\\w]+$\"]";
CreateType create = createRequest("placeHolder", "", "", null);
for (String test : tests) {
LOG.info("Testing with the name {}", test);

View File

@ -993,7 +993,7 @@ public class UserResourceTest extends EntityResourceTest<User, CreateUser> {
String csv = createCsv(UserCsv.HEADERS, listOf(record), null);
CsvImportResult result = importCsv(team.getName(), csv, false);
assertSummary(result, ApiStatus.FAILURE, 2, 1, 1);
String[] expectedRows = {resultsHeader, getFailedRecord(record, "[name must match \"\"^(?U)[\\w\\-.]+$\"\"]")};
String[] expectedRows = {resultsHeader, getFailedRecord(record, "[name must match \"\"(?U)^[\\w\\-.]+$\"\"]")};
assertRows(result, expectedRows);
// Invalid team

View File

@ -22,7 +22,7 @@ class ValidatorUtilTest {
// Invalid name
glossary.withName("invalid::Name").withDescription("description");
assertEquals("[name must match \"^(?U)[\\w'\\- .&()%]+$\"]", ValidatorUtil.validate(glossary));
assertEquals("[name must match \"(?U)^[\\w'\\- .&()%]+$\"]", ValidatorUtil.validate(glossary));
// No error
glossary.withName("validName").withId(UUID.randomUUID()).withDescription("description");

View File

@ -12,7 +12,7 @@
"type": "string",
"minLength": 2,
"maxLength": 64,
"pattern": "^(?U)[\\w'\\- .&()]+$"
"pattern": "(?U)^[\\w'\\- .&()]+$"
}
},
"properties": {

View File

@ -14,7 +14,7 @@
"type": "string",
"minLength": 1,
"maxLength": 128,
"pattern": "^(?U)[\\w'\\-.]+$"
"pattern": "(?U)^[\\w'\\-.]+$"
}
},
"properties": {

View File

@ -12,7 +12,7 @@
"type": "string",
"minLength": 1,
"maxLength": 64,
"pattern": "^(?U)[\\w\\-.]+$"
"pattern": "(?U)^[\\w\\-.]+$"
},
"authenticationMechanism": {
"type": "object",

View File

@ -10,7 +10,7 @@
"entityName": {
"description": "Name of the property or entity types. Note a property name must be unique for an entity. Property name must follow camelCase naming adopted by openMetadata - must start with lower case with no space, underscore, or dots.",
"type": "string",
"pattern": "^(?U)[\\w]+$"
"pattern": "(?U)^[\\w]+$"
},
"category": {
"description": "Metadata category to which a type belongs to.",

View File

@ -93,14 +93,14 @@
"entityLink": {
"description": "Link to an entity or field within an entity using this format `<#E::{entities}::{entityType}::{field}::{arrayFieldName}::{arrayFieldValue}`.",
"type": "string",
"pattern": "^(?U)<#E::\\w+::[\\w'\\- .&/:+\"\\\\()$#%]+>$"
"pattern": "(?U)^<#E::\\w+::[\\w'\\- .&/:+\"\\\\()$#%]+>$"
},
"entityName": {
"description": "Name that identifies an entity.",
"type": "string",
"minLength": 1,
"maxLength": 128,
"pattern": "^(?U)[\\w'\\- .&()%]+$"
"pattern": "(?U)^[\\w'\\- .&()%]+$"
},
"fullyQualifiedEntityName": {
"description": "A unique name that identifies an entity. Example for table 'DatabaseService.Database.Schema.Table'.",

View File

@ -47,6 +47,6 @@ for file_path in UNICODE_REGEX_REPLACEMENT_FILE_PATHS:
with open(file_path, "r", encoding="UTF-8") as file_:
content = file_.read()
# Python now requires to move the global flags at the very start of the expression
content = content.replace("^(?U)", "(?u)^")
content = content.replace("(?U)", "(?u)")
with open(file_path, "w", encoding="UTF-8") as file_:
file_.write(content)