From 7ca018aaa455ff7a09a91d54ffbfef6460040549 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 11 Feb 2021 21:59:54 -0800 Subject: [PATCH] Remove unused imports --- .../.github/workflows/python-package.yml | 8 ++++---- metadata-ingestion/setup.cfg | 10 +++++++++- metadata-ingestion/src/gometa/configuration/common.py | 3 +-- metadata-ingestion/src/gometa/configuration/kafka.py | 3 +-- metadata-ingestion/src/gometa/configuration/yaml.py | 2 +- metadata-ingestion/src/gometa/entrypoints.py | 2 +- .../src/gometa/ingestion/api/closeable.py | 2 -- metadata-ingestion/src/gometa/ingestion/api/common.py | 2 +- metadata-ingestion/src/gometa/ingestion/api/report.py | 1 - .../src/gometa/ingestion/extractor/generic.py | 2 +- .../src/gometa/ingestion/extractor/schema_util.py | 5 +---- .../src/gometa/ingestion/run/pipeline.py | 9 ++------- .../src/gometa/ingestion/sink/datahub_kafka.py | 6 ++---- .../src/gometa/ingestion/sink/datahub_rest.py | 10 +++------- metadata-ingestion/src/gometa/ingestion/sink/file.py | 3 +-- .../src/gometa/ingestion/source/kafka.py | 6 ++---- .../src/gometa/ingestion/source/mce_file.py | 2 +- .../src/gometa/ingestion/source/sql_common.py | 10 +--------- .../tests/integration/mysql/test_mysql.py | 2 -- .../tests/integration/sql_server/test_sql_server.py | 4 ---- metadata-ingestion/tests/unit/serde/test_serde.py | 9 ++------- metadata-ingestion/tests/unit/test_allow_deny.py | 11 ++++++++--- metadata-ingestion/tests/unit/test_kafka_sink.py | 2 +- metadata-ingestion/tests/unit/test_pipeline.py | 2 +- 24 files changed, 44 insertions(+), 72 deletions(-) diff --git a/metadata-ingestion/.github/workflows/python-package.yml b/metadata-ingestion/.github/workflows/python-package.yml index 52b5e95eb7..c446fb9ec8 100644 --- a/metadata-ingestion/.github/workflows/python-package.yml +++ b/metadata-ingestion/.github/workflows/python-package.yml @@ -21,12 +21,12 @@ jobs: python -m pip install --upgrade pip pip install -e . pip install -r test_requirements.txt + - name: Check formatting with black + run: | + black --exclude 'gometa/metadata' -S -t py36 -l 120 src tests - name: Lint with flake8 run: | - # stop the build if there are Python syntax errors or undefined names - #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 src tests --count --statistics || true - name: Check with mypy run: | mypy -p gometa || true diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg index dcc5e0e013..6581183dc2 100644 --- a/metadata-ingestion/setup.cfg +++ b/metadata-ingestion/setup.cfg @@ -1,6 +1,14 @@ [flake8] -max-line-length = 130 +max-line-length = 120 max-complexity = 15 +ignore = D203 +exclude = + .git, + src/gometa/metadata, + __pycache__ +per-file-ignores = + # imported but unused + __init__.py: F401 [mypy] mypy_path = src diff --git a/metadata-ingestion/src/gometa/configuration/common.py b/metadata-ingestion/src/gometa/configuration/common.py index 4652098597..733a3b698f 100644 --- a/metadata-ingestion/src/gometa/configuration/common.py +++ b/metadata-ingestion/src/gometa/configuration/common.py @@ -1,7 +1,6 @@ from abc import ABC, abstractmethod -from typing import TypeVar, Type, List, IO +from typing import List, IO from pydantic import BaseModel, ValidationError -from pathlib import Path from contextlib import contextmanager import re diff --git a/metadata-ingestion/src/gometa/configuration/kafka.py b/metadata-ingestion/src/gometa/configuration/kafka.py index 27de9c045b..e6c2cf0d44 100644 --- a/metadata-ingestion/src/gometa/configuration/kafka.py +++ b/metadata-ingestion/src/gometa/configuration/kafka.py @@ -1,5 +1,4 @@ -from typing import Optional -from pydantic import BaseModel, Field, ValidationError, validator +from pydantic import BaseModel, validator class _KafkaConnectionConfig(BaseModel): diff --git a/metadata-ingestion/src/gometa/configuration/yaml.py b/metadata-ingestion/src/gometa/configuration/yaml.py index 9c8760cb45..a490e0741b 100644 --- a/metadata-ingestion/src/gometa/configuration/yaml.py +++ b/metadata-ingestion/src/gometa/configuration/yaml.py @@ -1,7 +1,7 @@ from typing import IO import yaml -from .common import ConfigModel, ConfigurationMechanism +from gometa.configuration import ConfigurationMechanism class YamlConfigurationMechanism(ConfigurationMechanism): diff --git a/metadata-ingestion/src/gometa/entrypoints.py b/metadata-ingestion/src/gometa/entrypoints.py index 83117fc5cb..5a9d7690e2 100644 --- a/metadata-ingestion/src/gometa/entrypoints.py +++ b/metadata-ingestion/src/gometa/entrypoints.py @@ -6,7 +6,7 @@ import click from gometa.configuration.common import ConfigurationMechanism, ConfigurationError, nicely_formatted_validation_errors from gometa.configuration.yaml import YamlConfigurationMechanism from gometa.configuration.toml import TomlConfigurationMechanism -from gometa.ingestion.run.pipeline import Pipeline, PipelineConfig +from gometa.ingestion.run.pipeline import Pipeline logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/gometa/ingestion/api/closeable.py b/metadata-ingestion/src/gometa/ingestion/api/closeable.py index 5cf1cad54f..2c5f1076a8 100644 --- a/metadata-ingestion/src/gometa/ingestion/api/closeable.py +++ b/metadata-ingestion/src/gometa/ingestion/api/closeable.py @@ -1,5 +1,3 @@ -from abc import abstractmethod, ABCMeta - class Closeable: def close(self): diff --git a/metadata-ingestion/src/gometa/ingestion/api/common.py b/metadata-ingestion/src/gometa/ingestion/api/common.py index 27d8fc82ed..aca5651087 100644 --- a/metadata-ingestion/src/gometa/ingestion/api/common.py +++ b/metadata-ingestion/src/gometa/ingestion/api/common.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import TypeVar, Generic, Optional +from typing import TypeVar, Generic from abc import abstractmethod, ABCMeta T = TypeVar('T') diff --git a/metadata-ingestion/src/gometa/ingestion/api/report.py b/metadata-ingestion/src/gometa/ingestion/api/report.py index ca4ea78fec..8e8b63b6bb 100644 --- a/metadata-ingestion/src/gometa/ingestion/api/report.py +++ b/metadata-ingestion/src/gometa/ingestion/api/report.py @@ -1,4 +1,3 @@ -from abc import abstractmethod, ABCMeta from dataclasses import dataclass import json import pprint diff --git a/metadata-ingestion/src/gometa/ingestion/extractor/generic.py b/metadata-ingestion/src/gometa/ingestion/extractor/generic.py index e7f7058ced..ce2b0c53bf 100644 --- a/metadata-ingestion/src/gometa/ingestion/extractor/generic.py +++ b/metadata-ingestion/src/gometa/ingestion/extractor/generic.py @@ -1,5 +1,5 @@ from typing import Iterable -from gometa.ingestion.api.source import Extractor, WorkUnit +from gometa.ingestion.api.source import Extractor from gometa.ingestion.api import RecordEnvelope from gometa.ingestion.api.common import PipelineContext from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent diff --git a/metadata-ingestion/src/gometa/ingestion/extractor/schema_util.py b/metadata-ingestion/src/gometa/ingestion/extractor/schema_util.py index 8da2c5795c..896ca05c53 100644 --- a/metadata-ingestion/src/gometa/ingestion/extractor/schema_util.py +++ b/metadata-ingestion/src/gometa/ingestion/extractor/schema_util.py @@ -1,10 +1,8 @@ import logging -from typing import List, Dict, Any +from typing import List, Any import avro.schema from gometa.metadata.com.linkedin.pegasus2avro.schema import ( - SchemaMetadata, - KafkaSchema, SchemaField, SchemaFieldDataType, BooleanTypeClass, @@ -14,7 +12,6 @@ from gometa.metadata.com.linkedin.pegasus2avro.schema import ( NumberTypeClass, EnumTypeClass, NullTypeClass, - MapTypeClass, ArrayTypeClass, UnionTypeClass, RecordTypeClass, diff --git a/metadata-ingestion/src/gometa/ingestion/run/pipeline.py b/metadata-ingestion/src/gometa/ingestion/run/pipeline.py index c351cef644..dfc92c406d 100644 --- a/metadata-ingestion/src/gometa/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/gometa/ingestion/run/pipeline.py @@ -1,14 +1,9 @@ -from typing import Dict -from pydantic import BaseModel -from dataclasses import dataclass, field -import pprint from gometa.configuration.common import DynamicTypedConfig, ConfigModel from gometa.ingestion.api.source import Source, Extractor from gometa.ingestion.source import source_class_mapping from gometa.ingestion.api.common import PipelineContext -from gometa.ingestion.api.sink import Sink, NoopWriteCallback, WriteCallback +from gometa.ingestion.api.sink import Sink, WriteCallback from gometa.ingestion.sink import sink_class_mapping -from typing import Optional import importlib import time import logging @@ -77,7 +72,7 @@ class Pipeline: def run(self): callback = LoggingCallback() - extractor = self.extractor_class() + extractor: Extractor = self.extractor_class() for wu in self.source.get_workunits(): # TODO: change extractor interface extractor.configure({}, self.ctx) diff --git a/metadata-ingestion/src/gometa/ingestion/sink/datahub_kafka.py b/metadata-ingestion/src/gometa/ingestion/sink/datahub_kafka.py index 55fee7d02d..78d1e4af12 100644 --- a/metadata-ingestion/src/gometa/ingestion/sink/datahub_kafka.py +++ b/metadata-ingestion/src/gometa/ingestion/sink/datahub_kafka.py @@ -1,7 +1,5 @@ -from dataclasses import dataclass, field -import json -from typing import Optional, TypeVar, Type -from pydantic import BaseModel, Field, ValidationError, validator +from dataclasses import dataclass +from pydantic import BaseModel from gometa.ingestion.api.sink import Sink, WriteCallback, SinkReport from gometa.ingestion.api.common import RecordEnvelope, WorkUnit, PipelineContext from gometa.configuration.kafka import KafkaProducerConnectionConfig diff --git a/metadata-ingestion/src/gometa/ingestion/sink/datahub_rest.py b/metadata-ingestion/src/gometa/ingestion/sink/datahub_rest.py index bb7e597476..3306f61a3d 100644 --- a/metadata-ingestion/src/gometa/ingestion/sink/datahub_rest.py +++ b/metadata-ingestion/src/gometa/ingestion/sink/datahub_rest.py @@ -1,14 +1,10 @@ -from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Optional, TypeVar, Type, Dict -from pydantic import BaseModel, Field, ValidationError, validator -from enum import Enum -from pathlib import Path +from typing import Type, Dict +from pydantic import BaseModel import requests from requests.exceptions import HTTPError from gometa.ingestion.api.sink import Sink, WriteCallback, SinkReport from gometa.ingestion.api.common import RecordEnvelope, WorkUnit -import json from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from gometa.metadata import ( ChartSnapshotClass, @@ -18,7 +14,7 @@ from gometa.metadata import ( DatasetSnapshotClass, DataProcessSnapshotClass, MLModelSnapshotClass, - MLFeatureSnapshotClass, + # MLFeatureSnapshotClass, ) from collections import OrderedDict diff --git a/metadata-ingestion/src/gometa/ingestion/sink/file.py b/metadata-ingestion/src/gometa/ingestion/sink/file.py index 029d3964d3..48679854e5 100644 --- a/metadata-ingestion/src/gometa/ingestion/sink/file.py +++ b/metadata-ingestion/src/gometa/ingestion/sink/file.py @@ -1,7 +1,6 @@ from gometa.ingestion.api.sink import Sink, WriteCallback, SinkReport -from gometa.ingestion.api.common import RecordEnvelope, PipelineContext, WorkUnit +from gometa.ingestion.api.common import RecordEnvelope, PipelineContext from pydantic import BaseModel -import os import pathlib import logging import json diff --git a/metadata-ingestion/src/gometa/ingestion/source/kafka.py b/metadata-ingestion/src/gometa/ingestion/source/kafka.py index 660f4fd82e..0798633a28 100644 --- a/metadata-ingestion/src/gometa/ingestion/source/kafka.py +++ b/metadata-ingestion/src/gometa/ingestion/source/kafka.py @@ -1,14 +1,12 @@ import logging from gometa.configuration import ConfigModel from gometa.configuration.kafka import KafkaConsumerConnectionConfig -from gometa.ingestion.api.source import Source, Extractor, SourceReport -from gometa.ingestion.api.source import WorkUnit -from typing import Optional, Iterable, List, Dict, Any +from gometa.ingestion.api.source import Source, SourceReport +from typing import Iterable, List, Dict, Any from dataclasses import dataclass, field import confluent_kafka from confluent_kafka.schema_registry.schema_registry_client import SchemaRegistryClient import re -from gometa.ingestion.api.closeable import Closeable from gometa.ingestion.source.metadata_common import MetadataWorkUnit import time diff --git a/metadata-ingestion/src/gometa/ingestion/source/mce_file.py b/metadata-ingestion/src/gometa/ingestion/source/mce_file.py index f25ccfa2b6..0b6790c8da 100644 --- a/metadata-ingestion/src/gometa/ingestion/source/mce_file.py +++ b/metadata-ingestion/src/gometa/ingestion/source/mce_file.py @@ -1,7 +1,7 @@ import json from dataclasses import dataclass, field from pydantic import BaseModel -from typing import Optional, Iterable +from typing import Iterable from gometa.ingestion.api.source import Source, SourceReport from gometa.ingestion.source.metadata_common import MetadataWorkUnit from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent diff --git a/metadata-ingestion/src/gometa/ingestion/source/sql_common.py b/metadata-ingestion/src/gometa/ingestion/source/sql_common.py index 1f9fe66623..b45b217416 100644 --- a/metadata-ingestion/src/gometa/ingestion/source/sql_common.py +++ b/metadata-ingestion/src/gometa/ingestion/source/sql_common.py @@ -3,7 +3,7 @@ from sqlalchemy import types from sqlalchemy.engine import reflection from gometa.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from gometa.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot -from gometa.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata, MySqlDDL +from gometa.metadata.com.linkedin.pegasus2avro.schema import SchemaMetadata, MySqlDDL, SchemaField, SchemaFieldDataType from gometa.metadata.com.linkedin.pegasus2avro.common import AuditStamp from gometa.ingestion.api.source import WorkUnit, Source, SourceReport @@ -15,21 +15,13 @@ from typing import Optional, List, Any, Dict from dataclasses import dataclass, field from gometa.metadata.com.linkedin.pegasus2avro.schema import ( - SchemaMetadata, - KafkaSchema, - SchemaField, - SchemaFieldDataType, BooleanTypeClass, - FixedTypeClass, StringTypeClass, BytesTypeClass, NumberTypeClass, EnumTypeClass, NullTypeClass, - MapTypeClass, ArrayTypeClass, - UnionTypeClass, - RecordTypeClass, ) logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/tests/integration/mysql/test_mysql.py b/metadata-ingestion/tests/integration/mysql/test_mysql.py index 5763716cae..ee3a376bee 100644 --- a/metadata-ingestion/tests/integration/mysql/test_mysql.py +++ b/metadata-ingestion/tests/integration/mysql/test_mysql.py @@ -1,6 +1,4 @@ import os -import pytest -import subprocess import mce_helpers diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index fefe57e142..684ce3e841 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -1,11 +1,7 @@ import os -import pytest import subprocess -import time - - def test_ingest(sql_server, pytestconfig): docker = "docker" command = f"{docker} exec testsqlserver /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P 'test!Password' -d master -i /setup/setup.sql" diff --git a/metadata-ingestion/tests/unit/serde/test_serde.py b/metadata-ingestion/tests/unit/serde/test_serde.py index 1aa28d0109..8681ca3f88 100644 --- a/metadata-ingestion/tests/unit/serde/test_serde.py +++ b/metadata-ingestion/tests/unit/serde/test_serde.py @@ -1,8 +1,3 @@ -import os -import pytest -import subprocess -import shutil - from gometa.ingestion.run.pipeline import Pipeline import mce_helpers @@ -19,8 +14,8 @@ def test_serde_large(pytestconfig, tmp_path): pipeline = Pipeline.create( { - 'source': {'type': 'file', 'file': {'filename': str(golden_file),},}, - 'sink': {'type': 'file', 'file': {'filename': str(output_file),},}, + 'source': {'type': 'file', 'file': {'filename': str(golden_file)}}, + 'sink': {'type': 'file', 'file': {'filename': str(output_file)}}, } ) pipeline.run() diff --git a/metadata-ingestion/tests/unit/test_allow_deny.py b/metadata-ingestion/tests/unit/test_allow_deny.py index 830897ac56..b628e3214d 100644 --- a/metadata-ingestion/tests/unit/test_allow_deny.py +++ b/metadata-ingestion/tests/unit/test_allow_deny.py @@ -3,14 +3,19 @@ from gometa.configuration.common import AllowDenyPattern def test_allow_all(): pattern = AllowDenyPattern.allow_all() - assert pattern.allowed("foo.table") == True + assert pattern.allowed("foo.table") def test_deny_all(): pattern = AllowDenyPattern(allow=[], deny=[".*"]) - assert pattern.allowed("foo.table") == False + assert not pattern.allowed("foo.table") def test_single_table(): pattern = AllowDenyPattern(allow=["foo.mytable"]) - assert pattern.allowed("foo.mytable") == True + assert pattern.allowed("foo.mytable") + + +def test_default_deny(): + pattern = AllowDenyPattern(allow=["foo.mytable"]) + assert not pattern.allowed("foo.bar") diff --git a/metadata-ingestion/tests/unit/test_kafka_sink.py b/metadata-ingestion/tests/unit/test_kafka_sink.py index 0b77fa67b7..45f0a62b20 100644 --- a/metadata-ingestion/tests/unit/test_kafka_sink.py +++ b/metadata-ingestion/tests/unit/test_kafka_sink.py @@ -3,7 +3,7 @@ from gometa.ingestion.api.sink import WriteCallback, SinkReport import unittest from unittest.mock import patch, MagicMock -from gometa.ingestion.api.common import RecordEnvelope, PipelineContext +from gometa.ingestion.api.common import RecordEnvelope class KafkaSinkTest(unittest.TestCase): diff --git a/metadata-ingestion/tests/unit/test_pipeline.py b/metadata-ingestion/tests/unit/test_pipeline.py index f14f85768b..b629f9fadd 100644 --- a/metadata-ingestion/tests/unit/test_pipeline.py +++ b/metadata-ingestion/tests/unit/test_pipeline.py @@ -9,7 +9,7 @@ class PipelineTest(unittest.TestCase): @patch("gometa.ingestion.sink.console.ConsoleSink.close") def test_configure(self, mock_sink, mock_source): pipeline = Pipeline.create( - {"source": {"type": "kafka", "kafka": {"bootstrap": "localhost:9092"},}, "sink": {"type": "console"},} + {"source": {"type": "kafka", "kafka": {"bootstrap": "localhost:9092"}}, "sink": {"type": "console"}} ) pipeline.run() mock_source.assert_called_once()