Support postgres multiple DB ingestion for user with no access (#10207)

This commit is contained in:
Mayur Singal 2023-02-23 15:42:40 +05:30 committed by GitHub
parent 9dbfea2fca
commit 0fafbf70ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 58 additions and 19 deletions

View File

@ -5,4 +5,23 @@ ADD UNIQUE (email);
-- Remove classificationName in BigQuery
UPDATE dbservice_entity
SET json = JSON_REMOVE(json, '$.connection.config.classificationName') where serviceType in ('BigQuery');
SET json = JSON_REMOVE(json, '$.connection.config.classificationName') where serviceType in ('BigQuery');
-- migrate ingestAllDatabases in postgres
UPDATE dbservice_entity de2
SET json = JSON_REPLACE(
JSON_INSERT(json,
'$.connection.config.database',
(select JSON_EXTRACT(json, '$.name')
from database_entity de
where id = (select er.toId
from entity_relationship er
where er.fromId = de2.id
and er.toEntity = 'database'
LIMIT 1
))
), '$.connection.config.ingestAllDatabases',
true
)
where de2.serviceType = 'Postgres'
and JSON_EXTRACT(json, '$.connection.config.database') is NULL

View File

@ -4,4 +4,24 @@ ADD UNIQUE (email);
-- Remove classificationName in BigQuery
UPDATE dbservice_entity SET json = json #- '{connection,config,classificationName}' where serviceType in ('BigQuery');
UPDATE dbservice_entity SET json = json #- '{connection,config,classificationName}' where serviceType in ('BigQuery');
-- migrate ingestAllDatabases in postgres
UPDATE dbservice_entity de2
SET json = JSONB_SET(
json || JSONB_SET(json,'{connection,config}', json#>'{connection,config}'||
jsonb_build_object('database',
(SELECT json->>'name'
FROM database_entity de
WHERE id = (SELECT er.toId
FROM entity_relationship er
WHERE er.fromId = de2.id
AND er.toEntity = 'database'
LIMIT 1)
)
)),
'{connection,config,ingestAllDatabases}',
'true'::jsonb
)
WHERE de2.serviceType = 'Postgres'
AND json->>'{connection,config,database}' IS NULL;

View File

@ -48,6 +48,7 @@ from metadata.ingestion.source.database.common_db_source import (
)
from metadata.ingestion.source.database.postgres.queries import (
POSTGRES_GET_ALL_TABLE_PG_POLICY,
POSTGRES_GET_DB_NAMES,
POSTGRES_GET_TABLE_NAMES,
POSTGRES_PARTITION_DETAILS,
POSTGRES_TABLE_COMMENTS,
@ -174,14 +175,12 @@ class PostgresSource(CommonDbSourceService):
]
def get_database_names(self) -> Iterable[str]:
configured_db = self.config.serviceConnection.__root__.config.database
if configured_db:
if not self.config.serviceConnection.__root__.config.ingestAllDatabases:
configured_db = self.config.serviceConnection.__root__.config.database
self.set_inspector(database_name=configured_db)
yield configured_db
else:
results = self.connection.execute(
"select datname from pg_catalog.pg_database"
)
results = self.connection.execute(POSTGRES_GET_DB_NAMES)
for res in results:
row = list(res)
new_database = row[0]

View File

@ -132,3 +132,8 @@ POSTGRES_SQL_STATEMENT_TEST = """
JOIN pg_catalog.pg_database d ON s.dbid = d.oid
JOIN pg_catalog.pg_user u ON s.userid = u.usesysid
"""
POSTGRES_GET_DB_NAMES = """
select datname from pg_catalog.pg_database
"""

View File

@ -557,16 +557,6 @@ class SourceConnectionTest(TestCase):
assert expected_url == get_connection_url_common(mariadb_conn_obj)
def test_postgres_url(self):
# connection arguments without db
expected_url = "postgresql+psycopg2://openmetadata_user:@localhost:5432"
postgres_conn_obj = PostgresConnection(
username="openmetadata_user",
hostPort="localhost:5432",
scheme=PostgresScheme.postgresql_psycopg2,
database=None,
)
assert expected_url == get_connection_url_common(postgres_conn_obj)
# connection arguments with db
expected_url = "postgresql+psycopg2://openmetadata_user:@localhost:5432/default"
postgres_conn_obj = PostgresConnection(
@ -745,7 +735,7 @@ class SourceConnectionTest(TestCase):
username="user",
password=None,
hostPort="localhost:443",
database=None,
database="postgres",
connectionArguments=None,
scheme=PostgresScheme.postgresql_psycopg2,
)

View File

@ -59,6 +59,12 @@
"type": "string",
"default": "PostgresPolicyTags"
},
"ingestAllDatabases": {
"title": "Ingest All Databases",
"description": "Ingest data from all databases in Postgres. You can use databaseFilterPattern on top of this.",
"type": "boolean",
"default": false
},
"connectionOptions": {
"title": "Connection Options",
"$ref": "../connectionBasicType.json#/definitions/connectionOptions"
@ -94,5 +100,5 @@
}
},
"additionalProperties": false,
"required": ["hostPort", "username"]
"required": ["hostPort", "username", "database"]
}