From 85f132738bdc23c2aee3435f7742b7a8c007bdea Mon Sep 17 00:00:00 2001 From: Ayush Shah Date: Mon, 6 Dec 2021 23:52:27 +0530 Subject: [PATCH] Remove faker from Sample-data (#1536) * Removed Faker as a dependency * Added ModuleNotFoundError * Removed faker from requirements.txt * Removed SampleData plugin from setup.py --- ingestion/requirements.txt | 1 - ingestion/setup.py | 2 +- .../metadata/ingestion/source/sample_data.py | 57 ------------------- .../ingestion/source/sample_entity.py | 2 - 4 files changed, 1 insertion(+), 61 deletions(-) diff --git a/ingestion/requirements.txt b/ingestion/requirements.txt index 4c4c0df1b95..873fbae5d55 100644 --- a/ingestion/requirements.txt +++ b/ingestion/requirements.txt @@ -4,7 +4,6 @@ expandvars~=0.6.5 requests~=2.25.1 python-dateutil~=2.8.1 SQLAlchemy~=1.4.5 -Faker~=8.1.1 commonregex~=1.5.4 setuptools~=57.0.0 PyHive~=0.6.4 diff --git a/ingestion/setup.py b/ingestion/setup.py index 8785799052e..c63597fbabc 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -108,7 +108,7 @@ plugins: Dict[str, Set[str]] = { "data-profiler": {"openmetadata-data-profiler"}, "snowflake": {"snowflake-sqlalchemy<=1.2.4"}, "snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"}, - "sample-data": {"faker~=8.1.1"}, + "sample-entity": {"faker~=8.1.1"}, "superset": {}, "tableau": {"tableau-api-lib==0.1.22"}, "vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"}, diff --git a/ingestion/src/metadata/ingestion/source/sample_data.py b/ingestion/src/metadata/ingestion/source/sample_data.py index ab9b02ee789..fe6d4313c8b 100644 --- a/ingestion/src/metadata/ingestion/source/sample_data.py +++ b/ingestion/src/metadata/ingestion/source/sample_data.py @@ -18,7 +18,6 @@ from collections import namedtuple from dataclasses import dataclass, field from typing import Any, Dict, Iterable, List, Union -from faker import Faker from pydantic import ValidationError from metadata.config.common import ConfigModel @@ -163,58 +162,6 @@ class SampleTableMetadataGenerator: return data -class GenerateFakeSampleData: - def __init__(self) -> None: - pass - - @classmethod - def check_columns(self, columns): - fake = Faker() - colData = [] - colList = [column["name"] for column in columns] - for i in range(25): - row = [] - for column in columns: - col_name = column["name"] - value = None - if "id" in col_name: - value = uuid.uuid4() - elif "price" in col_name or "currency" in col_name: - value = fake.pricetag() - elif "barcode" in col_name: - value = fake.ean(length=13) - elif "phone" in col_name: - value = fake.phone_number() - elif "zip" in col_name: - value = fake.postcode() - elif "address" in col_name: - value = fake.street_address() - elif "company" in col_name: - value = fake.company() - elif "region" in col_name: - value = fake.street_address() - elif "name" in col_name: - value = fake.first_name() - elif "city" in col_name: - value = fake.city() - elif "country" in col_name: - value = fake.country() - if value is None: - if "TIMESTAMP" in column["dataType"] or "date" in col_name: - value = fake.unix_time() - elif "BOOLEAN" in column["dataType"]: - value = fake.pybool() - elif "NUMERIC" in column["dataType"]: - value = fake.pyint() - elif "VARCHAR" in column["dataType"]: - value = fake.text(max_nb_chars=20) - else: - value = None - row.append(value) - colData.append(row) - return {"columns": colList, "rows": colData} - - class SampleDataSource(Source): def __init__( self, config: SampleDataSourceConfig, metadata_config: MetadataServerConfig, ctx @@ -393,10 +340,6 @@ class SampleDataSource(Source): ), ) for table in self.tables["tables"]: - if not table.get("sampleData"): - table["sampleData"] = GenerateFakeSampleData.check_columns( - table["columns"] - ) table_metadata = Table(**table) table_and_db = OMetaDatabaseAndTable(table=table_metadata, database=db) self.status.scanned("table", table_metadata.name.__root__) diff --git a/ingestion/src/metadata/ingestion/source/sample_entity.py b/ingestion/src/metadata/ingestion/source/sample_entity.py index 827103757b4..198f01148d3 100644 --- a/ingestion/src/metadata/ingestion/source/sample_entity.py +++ b/ingestion/src/metadata/ingestion/source/sample_entity.py @@ -40,7 +40,6 @@ from metadata.ingestion.models.table_metadata import Chart, Dashboard from metadata.ingestion.ometa.client import APIError from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig -from metadata.ingestion.processor.pii import ColumnNameScanner from metadata.ingestion.source.sql_source import SQLConnectionConfig from metadata.utils.helpers import snake_to_camel @@ -91,7 +90,6 @@ class SampleEntitySource(Source): self.config = config self.metadata_config = metadata_config self.metadata = OpenMetadata(metadata_config) - self.column_scanner = ColumnNameScanner() self.service_name = lambda: self.faker.word() self.service_type = lambda: random.choice( ["BigQuery", "Hive", "MSSQL", "MySQL", "Postgres", "Redshift", "Snowflake"]