Fix #11868: Duplicated queries cannot be created (#15519)

* Fix #11868: Duplicate query should throw an error of entityExists

* Fix #11868: Duplicate query should throw an error of entityExists

* fix test

* fix test

* Fix uniquee constraint for checksum in Postgres

---------

Co-authored-by: Pere Miquel Brull <peremiquelbrull@gmail.com>
This commit is contained in:
Sriharsha Chintalapani 2024-03-13 05:02:26 -07:00 committed by GitHub
parent dbc1fc180c
commit d0efaac877
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 90 additions and 26 deletions

View File

@ -1,4 +1,8 @@
-- Add the supportsProfiler field to the MongoDB connection configuration
UPDATE dbservice_entity
SET json = JSON_INSERT(json, '$.connection.config.supportsProfiler', TRUE)
WHERE serviceType = 'MongoDB';
WHERE serviceType = 'MongoDB';
ALTER TABLE query_entity ADD COLUMN checksum VARCHAR(32) GENERATED ALWAYS AS (json ->> '$.checksum') NOT NULL UNIQUE;
UPDATE query_entity SET json = JSON_INSERT(json, '$.checksum', MD5(JSON_UNQUOTE(JSON_EXTRACT(json, '$.checksum'))));

View File

@ -1,4 +1,9 @@
-- Add the supportsProfiler field to the MongoDB connection configuration
UPDATE dbservice_entity
SET json = jsonb_set(json::jsonb, '{connection,config,supportsProfiler}', 'true'::jsonb)
WHERE serviceType = 'MongoDB';
WHERE serviceType = 'MongoDB';
ALTER TABLE query_entity ADD COLUMN checksum varchar(32) GENERATED ALWAYS AS (json ->> 'checksum') STORED NOT NULL,
ADD UNIQUE(checksum);
UPDATE query_entity SET json = jsonb_set(json::jsonb, '{checksum}', MD5(json->'connection'));

View File

@ -13,6 +13,7 @@ OMeta ES Mixin integration tests. The API needs to be up
"""
import logging
import time
import uuid
from unittest import TestCase
from requests.utils import quote
@ -49,8 +50,6 @@ from metadata.generated.schema.type.basic import SqlQuery
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.utils import fqn
QUERY_CHECKSUM = fqn.get_query_checksum("select * from awesome")
class OMetaESTest(TestCase):
"""
@ -118,7 +117,7 @@ class OMetaESTest(TestCase):
metadata=None,
entity_type=Query,
service_name="test-service-es",
query_checksum=QUERY_CHECKSUM,
query_checksum=cls.checksum,
),
)
if not table_res or query_res:
@ -155,16 +154,18 @@ class OMetaESTest(TestCase):
cls.entity = cls.metadata.create_or_update(create)
query_str = str(uuid.uuid4())
cls.checksum = fqn.get_query_checksum(query_str)
# Create queries for the given service
query = CreateQueryRequest(
query=SqlQuery(__root__="select * from awesome"),
query=SqlQuery(__root__=query_str),
service=cls.service_entity.fullyQualifiedName,
processedLineage=True, # Only 1 with processed lineage
)
cls.metadata.create_or_update(query)
query2 = CreateQueryRequest(
query=SqlQuery(__root__="select * from another_awesome"),
query=SqlQuery(__root__=str(uuid.uuid4())),
service=cls.service_entity.fullyQualifiedName,
)
cls.metadata.create_or_update(query2)
@ -175,7 +176,7 @@ class OMetaESTest(TestCase):
)
another_query = CreateQueryRequest(
query=SqlQuery(__root__="select * from awesome"),
query=SqlQuery(__root__=str(uuid.uuid4())),
service=cls.another_service_entity.fullyQualifiedName,
processedLineage=True,
)
@ -290,4 +291,4 @@ class OMetaESTest(TestCase):
def test_get_queries_with_lineage(self):
"""Check the payload from ES"""
res = self.metadata.es_get_queries_with_lineage(self.service.name.__root__)
self.assertIn(QUERY_CHECKSUM, res)
self.assertIn(self.checksum, res)

View File

@ -492,7 +492,7 @@ class OMetaTableTest(TestCase):
)
query_no_user = CreateQueryRequest(
query=SqlQuery(__root__="select * from awesome"),
query=SqlQuery(__root__="select * from first_awesome"),
service=FullyQualifiedEntityName(__root__=self.service.name.__root__),
)
@ -507,7 +507,7 @@ class OMetaTableTest(TestCase):
# Validate that we can properly add user information
query_with_user = CreateQueryRequest(
query="select * from awesome",
query="select * from second_awesome",
users=[self.owner.fullyQualifiedName],
service=FullyQualifiedEntityName(__root__=self.service.name.__root__),
)
@ -517,10 +517,17 @@ class OMetaTableTest(TestCase):
res.id, fields=["*"]
)
assert len(table_with_query) == 1
assert table_with_query[0].query == query_with_user.query
assert len(table_with_query[0].users) == 1
assert table_with_query[0].users[0].id == self.owner.id
assert len(table_with_query) == 2
query_with_owner = next(
(
query
for query in table_with_query
if query.query == query_with_user.query
),
None,
)
assert len(query_with_owner.users) == 1
assert query_with_owner.users[0].id == self.owner.id
def test_list_versions(self):
"""

View File

@ -275,8 +275,9 @@ public class QueryRepository extends EntityRepository<Query> {
String originalChecksum = EntityUtil.hash(original.getQuery());
String updatedChecksum = EntityUtil.hash(updated.getQuery());
if (!originalChecksum.equals(updatedChecksum)) {
updated.setChecksum(updatedChecksum);
recordChange("query", original.getQuery(), updated.getQuery());
recordChange("checkSum", original.getChecksum(), updatedChecksum);
recordChange("checksum", original.getChecksum(), updated.getChecksum());
}
}
}

View File

@ -608,6 +608,7 @@ public class QueryResource extends EntityResource<Query, QueryRepository> {
return repository
.copy(new Query(), create, user)
.withQuery(create.getQuery())
.withChecksum(EntityUtil.hash(create.getQuery()))
.withService(getEntityReference(Entity.DATABASE_SERVICE, create.getService()))
.withDuration(create.getDuration())
.withVotes(new Votes().withUpVotes(0).withDownVotes(0))

View File

@ -1299,7 +1299,7 @@ public abstract class EntityResourceTest<T extends EntityInterface, K extends Cr
T entity = createEntity(request, ADMIN_AUTH_HEADERS);
// Update null description with a new description
request = createRequest(entity.getName(), "updatedDescription", "displayName", null);
request = request.withDescription("updatedDescription");
ChangeDescription change = getChangeDescription(entity, MINOR_UPDATE);
fieldAdded(change, "description", "updatedDescription");
updateAndCheckEntity(request, OK, ADMIN_AUTH_HEADERS, MINOR_UPDATE, change);

View File

@ -20,10 +20,14 @@ import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.Response;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.http.client.HttpResponseException;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.MethodOrderer;
import org.junit.jupiter.api.Order;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.junit.jupiter.api.TestMethodOrder;
import org.openmetadata.schema.api.VoteRequest;
import org.openmetadata.schema.api.data.CreateQuery;
import org.openmetadata.schema.api.data.CreateTable;
@ -39,6 +43,7 @@ import org.openmetadata.service.util.ResultList;
import org.openmetadata.service.util.TestUtils;
@Slf4j
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class QueryResourceTest extends EntityResourceTest<Query, CreateQuery> {
private EntityReference TABLE_REF;
private String QUERY;
@ -65,7 +70,7 @@ public class QueryResourceTest extends EntityResourceTest<Query, CreateQuery> {
.withOwner(EntityResourceTest.USER1_REF);
Table createdTable = tableResourceTest.createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
TABLE_REF = createdTable.getEntityReference();
QUERY = "select * from sales";
QUERY = "select * from %s";
QUERY_CHECKSUM = EntityUtil.hash(QUERY);
}
@ -76,7 +81,7 @@ public class QueryResourceTest extends EntityResourceTest<Query, CreateQuery> {
.withOwner(USER1_REF)
.withUsers(List.of(USER2.getName()))
.withQueryUsedIn(List.of(TABLE_REF))
.withQuery(QUERY)
.withQuery(String.format(QUERY, RandomStringUtils.random(10, true, false)))
.withDuration(0.0)
.withQueryDate(1673857635064L)
.withService(SNOWFLAKE_REFERENCE.getFullyQualifiedName());
@ -205,9 +210,10 @@ public class QueryResourceTest extends EntityResourceTest<Query, CreateQuery> {
// Note: in case of Query empty name works fine since we internally use Checksum
// Create an entity with mandatory name field null
final CreateQuery request =
createRequest(null, "description", "displayName", null).withQuery(QUERY);
createRequest(null, "description", "displayName", null)
.withQuery(String.format(QUERY, RandomStringUtils.random(10, true, false)));
Query entity = createEntity(request, ADMIN_AUTH_HEADERS);
assertEquals(QUERY_CHECKSUM, entity.getName());
assertEquals(EntityUtil.hash(request.getQuery()), entity.getChecksum());
// Create an entity with mandatory name field empty
final CreateQuery request1 = createRequest("TestQueryName", "description", "displayName", null);
@ -224,17 +230,17 @@ public class QueryResourceTest extends EntityResourceTest<Query, CreateQuery> {
}
@Test
@Order(1)
void test_sensitivePIIQuery() throws IOException {
CreateQuery create = createRequest("sensitiveQuery");
create.withTags(List.of(PII_SENSITIVE_TAG_LABEL));
createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
String createQuery = create.getQuery();
// Owner (USER1_REF) can see the results
ResultList<Query> queries = getQueries(100, "*", false, authHeaders(USER1_REF.getName()));
queries.getData().forEach(query -> assertEquals(query.getQuery(), QUERY));
ResultList<Query> queries = getQueries(1, "*", false, authHeaders(USER1_REF.getName()));
queries.getData().forEach(query -> assertEquals(query.getQuery(), createQuery));
// Another user won't see the PII query body
ResultList<Query> maskedQueries = getQueries(100, "*", false, authHeaders(USER2_REF.getName()));
ResultList<Query> maskedQueries = getQueries(1, "*", false, authHeaders(USER2_REF.getName()));
maskedQueries
.getData()
.forEach(
@ -249,6 +255,45 @@ public class QueryResourceTest extends EntityResourceTest<Query, CreateQuery> {
});
}
@Test
void test_duplicateQueryFail() throws IOException {
String query = "select * from test";
CreateQuery create = createRequest("duplicateQuery");
create.setQuery(query);
Query createdQuery = createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
CreateQuery create1 = createRequest("query2");
create.setQuery("select * from dim_address");
Query createdQuery2 = createAndCheckEntity(create1, ADMIN_AUTH_HEADERS);
CreateQuery postDuplicateCreate = createRequest("duplicateQuery1");
postDuplicateCreate.setQuery(query);
String origJson = JsonUtils.pojoToJson(query);
Query updatedQuery = getEntity(createdQuery.getId(), ADMIN_AUTH_HEADERS);
updatedQuery.setQuery("select * from dim_address");
assertResponse(
() -> createEntity(postDuplicateCreate, ADMIN_AUTH_HEADERS),
Response.Status.CONFLICT,
"Entity already exists");
}
@Test
void test_patchQueryMustUpdateChecksum(TestInfo test) throws IOException {
CreateQuery create = createRequest(getEntityName(test));
Query query = createAndCheckEntity(create, ADMIN_AUTH_HEADERS);
// Add queryUsedIn as TEST_TABLE2
String origJson = JsonUtils.pojoToJson(query);
String queryText = String.format(QUERY, "test2");
query.setQuery(queryText);
ChangeDescription change = getChangeDescription(query, MINOR_UPDATE);
fieldUpdated(change, "query", create.getQuery(), queryText);
fieldUpdated(
change, "checksum", EntityUtil.hash(create.getQuery()), EntityUtil.hash(queryText));
patchEntityAndCheck(query, origJson, ADMIN_AUTH_HEADERS, MINOR_UPDATE, change);
Query updatedQuery = getEntity(query.getId(), ADMIN_AUTH_HEADERS);
assertEquals(updatedQuery.getQuery(), queryText);
assertEquals(updatedQuery.getChecksum(), EntityUtil.hash(updatedQuery.getQuery()));
}
public ResultList<Query> getQueries(
Integer limit, String fields, Boolean includeAll, Map<String, String> authHeaders)
throws HttpResponseException {