diff --git a/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql index 06035180588..d47997009b3 100644 --- a/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql +++ b/bootstrap/sql/migrations/native/1.4.0/mysql/schemaChanges.sql @@ -1,4 +1,8 @@ -- Add the supportsProfiler field to the MongoDB connection configuration UPDATE dbservice_entity SET json = JSON_INSERT(json, '$.connection.config.supportsProfiler', TRUE) -WHERE serviceType = 'MongoDB'; \ No newline at end of file +WHERE serviceType = 'MongoDB'; + +ALTER TABLE query_entity ADD COLUMN checksum VARCHAR(32) GENERATED ALWAYS AS (json ->> '$.checksum') NOT NULL UNIQUE; + +UPDATE query_entity SET json = JSON_INSERT(json, '$.checksum', MD5(JSON_UNQUOTE(JSON_EXTRACT(json, '$.checksum')))); \ No newline at end of file diff --git a/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql index c8b6830c129..bcab92ec586 100644 --- a/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql +++ b/bootstrap/sql/migrations/native/1.4.0/postgres/schemaChanges.sql @@ -1,4 +1,9 @@ -- Add the supportsProfiler field to the MongoDB connection configuration UPDATE dbservice_entity SET json = jsonb_set(json::jsonb, '{connection,config,supportsProfiler}', 'true'::jsonb) -WHERE serviceType = 'MongoDB'; \ No newline at end of file +WHERE serviceType = 'MongoDB'; + +ALTER TABLE query_entity ADD COLUMN checksum varchar(32) GENERATED ALWAYS AS (json ->> 'checksum') STORED NOT NULL, + ADD UNIQUE(checksum); + +UPDATE query_entity SET json = jsonb_set(json::jsonb, '{checksum}', MD5(json->'connection')); \ No newline at end of file diff --git a/ingestion/tests/integration/ometa/test_ometa_es_api.py b/ingestion/tests/integration/ometa/test_ometa_es_api.py index e7a2ef7ca03..91a86e4388b 100644 --- a/ingestion/tests/integration/ometa/test_ometa_es_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_es_api.py @@ -13,6 +13,7 @@ OMeta ES Mixin integration tests. The API needs to be up """ import logging import time +import uuid from unittest import TestCase from requests.utils import quote @@ -49,8 +50,6 @@ from metadata.generated.schema.type.basic import SqlQuery from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.utils import fqn -QUERY_CHECKSUM = fqn.get_query_checksum("select * from awesome") - class OMetaESTest(TestCase): """ @@ -118,7 +117,7 @@ class OMetaESTest(TestCase): metadata=None, entity_type=Query, service_name="test-service-es", - query_checksum=QUERY_CHECKSUM, + query_checksum=cls.checksum, ), ) if not table_res or query_res: @@ -155,16 +154,18 @@ class OMetaESTest(TestCase): cls.entity = cls.metadata.create_or_update(create) + query_str = str(uuid.uuid4()) + cls.checksum = fqn.get_query_checksum(query_str) # Create queries for the given service query = CreateQueryRequest( - query=SqlQuery(__root__="select * from awesome"), + query=SqlQuery(__root__=query_str), service=cls.service_entity.fullyQualifiedName, processedLineage=True, # Only 1 with processed lineage ) cls.metadata.create_or_update(query) query2 = CreateQueryRequest( - query=SqlQuery(__root__="select * from another_awesome"), + query=SqlQuery(__root__=str(uuid.uuid4())), service=cls.service_entity.fullyQualifiedName, ) cls.metadata.create_or_update(query2) @@ -175,7 +176,7 @@ class OMetaESTest(TestCase): ) another_query = CreateQueryRequest( - query=SqlQuery(__root__="select * from awesome"), + query=SqlQuery(__root__=str(uuid.uuid4())), service=cls.another_service_entity.fullyQualifiedName, processedLineage=True, ) @@ -290,4 +291,4 @@ class OMetaESTest(TestCase): def test_get_queries_with_lineage(self): """Check the payload from ES""" res = self.metadata.es_get_queries_with_lineage(self.service.name.__root__) - self.assertIn(QUERY_CHECKSUM, res) + self.assertIn(self.checksum, res) diff --git a/ingestion/tests/integration/ometa/test_ometa_table_api.py b/ingestion/tests/integration/ometa/test_ometa_table_api.py index 802d2336d38..7371d362cb5 100644 --- a/ingestion/tests/integration/ometa/test_ometa_table_api.py +++ b/ingestion/tests/integration/ometa/test_ometa_table_api.py @@ -492,7 +492,7 @@ class OMetaTableTest(TestCase): ) query_no_user = CreateQueryRequest( - query=SqlQuery(__root__="select * from awesome"), + query=SqlQuery(__root__="select * from first_awesome"), service=FullyQualifiedEntityName(__root__=self.service.name.__root__), ) @@ -507,7 +507,7 @@ class OMetaTableTest(TestCase): # Validate that we can properly add user information query_with_user = CreateQueryRequest( - query="select * from awesome", + query="select * from second_awesome", users=[self.owner.fullyQualifiedName], service=FullyQualifiedEntityName(__root__=self.service.name.__root__), ) @@ -517,10 +517,17 @@ class OMetaTableTest(TestCase): res.id, fields=["*"] ) - assert len(table_with_query) == 1 - assert table_with_query[0].query == query_with_user.query - assert len(table_with_query[0].users) == 1 - assert table_with_query[0].users[0].id == self.owner.id + assert len(table_with_query) == 2 + query_with_owner = next( + ( + query + for query in table_with_query + if query.query == query_with_user.query + ), + None, + ) + assert len(query_with_owner.users) == 1 + assert query_with_owner.users[0].id == self.owner.id def test_list_versions(self): """ diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/QueryRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/QueryRepository.java index ae1ad72c1af..392da07d24f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/QueryRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/QueryRepository.java @@ -275,8 +275,9 @@ public class QueryRepository extends EntityRepository { String originalChecksum = EntityUtil.hash(original.getQuery()); String updatedChecksum = EntityUtil.hash(updated.getQuery()); if (!originalChecksum.equals(updatedChecksum)) { + updated.setChecksum(updatedChecksum); recordChange("query", original.getQuery(), updated.getQuery()); - recordChange("checkSum", original.getChecksum(), updatedChecksum); + recordChange("checksum", original.getChecksum(), updated.getChecksum()); } } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/query/QueryResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/query/QueryResource.java index 2bbb7d3cfb7..bce569ab68f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/query/QueryResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/query/QueryResource.java @@ -608,6 +608,7 @@ public class QueryResource extends EntityResource { return repository .copy(new Query(), create, user) .withQuery(create.getQuery()) + .withChecksum(EntityUtil.hash(create.getQuery())) .withService(getEntityReference(Entity.DATABASE_SERVICE, create.getService())) .withDuration(create.getDuration()) .withVotes(new Votes().withUpVotes(0).withDownVotes(0)) diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java index a89d160333d..80edd6c1384 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java @@ -1299,7 +1299,7 @@ public abstract class EntityResourceTest { private EntityReference TABLE_REF; private String QUERY; @@ -65,7 +70,7 @@ public class QueryResourceTest extends EntityResourceTest { .withOwner(EntityResourceTest.USER1_REF); Table createdTable = tableResourceTest.createAndCheckEntity(create, ADMIN_AUTH_HEADERS); TABLE_REF = createdTable.getEntityReference(); - QUERY = "select * from sales"; + QUERY = "select * from %s"; QUERY_CHECKSUM = EntityUtil.hash(QUERY); } @@ -76,7 +81,7 @@ public class QueryResourceTest extends EntityResourceTest { .withOwner(USER1_REF) .withUsers(List.of(USER2.getName())) .withQueryUsedIn(List.of(TABLE_REF)) - .withQuery(QUERY) + .withQuery(String.format(QUERY, RandomStringUtils.random(10, true, false))) .withDuration(0.0) .withQueryDate(1673857635064L) .withService(SNOWFLAKE_REFERENCE.getFullyQualifiedName()); @@ -205,9 +210,10 @@ public class QueryResourceTest extends EntityResourceTest { // Note: in case of Query empty name works fine since we internally use Checksum // Create an entity with mandatory name field null final CreateQuery request = - createRequest(null, "description", "displayName", null).withQuery(QUERY); + createRequest(null, "description", "displayName", null) + .withQuery(String.format(QUERY, RandomStringUtils.random(10, true, false))); Query entity = createEntity(request, ADMIN_AUTH_HEADERS); - assertEquals(QUERY_CHECKSUM, entity.getName()); + assertEquals(EntityUtil.hash(request.getQuery()), entity.getChecksum()); // Create an entity with mandatory name field empty final CreateQuery request1 = createRequest("TestQueryName", "description", "displayName", null); @@ -224,17 +230,17 @@ public class QueryResourceTest extends EntityResourceTest { } @Test + @Order(1) void test_sensitivePIIQuery() throws IOException { CreateQuery create = createRequest("sensitiveQuery"); create.withTags(List.of(PII_SENSITIVE_TAG_LABEL)); createAndCheckEntity(create, ADMIN_AUTH_HEADERS); - + String createQuery = create.getQuery(); // Owner (USER1_REF) can see the results - ResultList queries = getQueries(100, "*", false, authHeaders(USER1_REF.getName())); - queries.getData().forEach(query -> assertEquals(query.getQuery(), QUERY)); - + ResultList queries = getQueries(1, "*", false, authHeaders(USER1_REF.getName())); + queries.getData().forEach(query -> assertEquals(query.getQuery(), createQuery)); // Another user won't see the PII query body - ResultList maskedQueries = getQueries(100, "*", false, authHeaders(USER2_REF.getName())); + ResultList maskedQueries = getQueries(1, "*", false, authHeaders(USER2_REF.getName())); maskedQueries .getData() .forEach( @@ -249,6 +255,45 @@ public class QueryResourceTest extends EntityResourceTest { }); } + @Test + void test_duplicateQueryFail() throws IOException { + String query = "select * from test"; + CreateQuery create = createRequest("duplicateQuery"); + create.setQuery(query); + Query createdQuery = createAndCheckEntity(create, ADMIN_AUTH_HEADERS); + CreateQuery create1 = createRequest("query2"); + create.setQuery("select * from dim_address"); + Query createdQuery2 = createAndCheckEntity(create1, ADMIN_AUTH_HEADERS); + CreateQuery postDuplicateCreate = createRequest("duplicateQuery1"); + postDuplicateCreate.setQuery(query); + String origJson = JsonUtils.pojoToJson(query); + Query updatedQuery = getEntity(createdQuery.getId(), ADMIN_AUTH_HEADERS); + updatedQuery.setQuery("select * from dim_address"); + assertResponse( + () -> createEntity(postDuplicateCreate, ADMIN_AUTH_HEADERS), + Response.Status.CONFLICT, + "Entity already exists"); + } + + @Test + void test_patchQueryMustUpdateChecksum(TestInfo test) throws IOException { + CreateQuery create = createRequest(getEntityName(test)); + Query query = createAndCheckEntity(create, ADMIN_AUTH_HEADERS); + + // Add queryUsedIn as TEST_TABLE2 + String origJson = JsonUtils.pojoToJson(query); + String queryText = String.format(QUERY, "test2"); + query.setQuery(queryText); + ChangeDescription change = getChangeDescription(query, MINOR_UPDATE); + fieldUpdated(change, "query", create.getQuery(), queryText); + fieldUpdated( + change, "checksum", EntityUtil.hash(create.getQuery()), EntityUtil.hash(queryText)); + patchEntityAndCheck(query, origJson, ADMIN_AUTH_HEADERS, MINOR_UPDATE, change); + Query updatedQuery = getEntity(query.getId(), ADMIN_AUTH_HEADERS); + assertEquals(updatedQuery.getQuery(), queryText); + assertEquals(updatedQuery.getChecksum(), EntityUtil.hash(updatedQuery.getQuery())); + } + public ResultList getQueries( Integer limit, String fields, Boolean includeAll, Map authHeaders) throws HttpResponseException {