From d12497a3ffc11b85d070b3ed4563d179bb31f41d Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 15 Feb 2021 12:17:23 -0800 Subject: [PATCH] Add postgres source --- metadata-ingestion/README.md | 24 +++++++++++++++++++ metadata-ingestion/setup.py | 3 --- .../src/gometa/ingestion/source/__init__.py | 2 ++ .../src/gometa/ingestion/source/postgres.py | 16 +++++++++++++ metadata-ingestion/test_requirements.txt | 4 ++++ 5 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 metadata-ingestion/src/gometa/ingestion/source/postgres.py diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index e7e588a931..989031dcd8 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -98,6 +98,8 @@ Extracts: - List of databases and tables - Column types and schema associated with each table +Extra requirements: `pip install pymysql` + ```yml source: type: mysql @@ -118,6 +120,8 @@ Extracts: - List of databases, schema, and tables - Column types associated with each table +Extra requirements: `pip install sqlalchemy-pytds` + ```yml source: type: mssql @@ -138,6 +142,8 @@ Extracts: - List of databases, schema, and tables - Column types associated with each table +Extra requirements: `pip install pyhive[hive]` + ```yml source: type: hive @@ -149,6 +155,24 @@ source: # table_pattern is same as above ``` +## PostgreSQL `postgres` +Extracts: +- List of databases, schema, and tables +- Column types associated with each table + +Extra requirements: `pip install psycopg2-binary` + +```yml +source: + type: postgres + config: + username: user + password: pass + host_port: localhost:5432 + database: DemoDatabase + # table_pattern is same as above +``` + ## File `file` Pulls metadata from a previously generated file. Note that the file sink can produce such files, and a number of samples are included in the diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 048ec9b369..b93f81e2c9 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -79,9 +79,6 @@ setuptools.setup( # fastavro for serialization. "fastavro>=1.3.0", "avro-python3>=1.8.2", - # Required for certain sources/sinks. "sqlalchemy>=1.3.23", # Required for SQL sources - "pymysql>=1.0.2", # Driver for MySQL - "sqlalchemy-pytds>=0.3", # Driver for MS-SQL ], ) diff --git a/metadata-ingestion/src/gometa/ingestion/source/__init__.py b/metadata-ingestion/src/gometa/ingestion/source/__init__.py index d4c49103da..22e5e24d53 100644 --- a/metadata-ingestion/src/gometa/ingestion/source/__init__.py +++ b/metadata-ingestion/src/gometa/ingestion/source/__init__.py @@ -9,11 +9,13 @@ from .mce_file import MetadataFileSource from .mssql import SQLServerSource from .mysql import MySQLSource from .hive import HiveSource +from .postgres import PostgresSource source_class_mapping: Dict[str, Type[Source]] = { "mssql": SQLServerSource, "mysql": MySQLSource, "hive": HiveSource, + "postgres": PostgresSource, "kafka": KafkaSource, # "ldap": LDAPSource, "file": MetadataFileSource, diff --git a/metadata-ingestion/src/gometa/ingestion/source/postgres.py b/metadata-ingestion/src/gometa/ingestion/source/postgres.py new file mode 100644 index 0000000000..6e838762bf --- /dev/null +++ b/metadata-ingestion/src/gometa/ingestion/source/postgres.py @@ -0,0 +1,16 @@ +from .sql_common import SQLAlchemyConfig, SQLAlchemySource + + +class PostgresConfig(SQLAlchemyConfig): + # defaults + scheme = "postgresql+psycopg2" + + +class PostgresSource(SQLAlchemySource): + def __init__(self, config, ctx): + super().__init__(config, ctx, "postgresql") + + @classmethod + def create(cls, config_dict, ctx): + config = PostgresConfig.parse_obj(config_dict) + return cls(config, ctx) diff --git a/metadata-ingestion/test_requirements.txt b/metadata-ingestion/test_requirements.txt index 219713ca9a..dbc1f34743 100644 --- a/metadata-ingestion/test_requirements.txt +++ b/metadata-ingestion/test_requirements.txt @@ -8,3 +8,7 @@ pytest-cov>=2.8.1 pytest-docker sqlalchemy-stubs deepdiff + +# These are used in integration tests. +pymysql>=1.0.2 # Driver for MySQL +sqlalchemy-pytds>=0.3 # Driver for MS-SQL