Remove faker from Sample-data (#1536)

* Removed Faker as a dependency

* Added ModuleNotFoundError

* Removed faker from requirements.txt

* Removed SampleData plugin from setup.py
This commit is contained in:
Ayush Shah 2021-12-06 23:52:27 +05:30 committed by GitHub
parent db3d10ac64
commit 85f132738b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 1 additions and 61 deletions

View File

@ -4,7 +4,6 @@ expandvars~=0.6.5
requests~=2.25.1
python-dateutil~=2.8.1
SQLAlchemy~=1.4.5
Faker~=8.1.1
commonregex~=1.5.4
setuptools~=57.0.0
PyHive~=0.6.4

View File

@ -108,7 +108,7 @@ plugins: Dict[str, Set[str]] = {
"data-profiler": {"openmetadata-data-profiler"},
"snowflake": {"snowflake-sqlalchemy<=1.2.4"},
"snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"},
"sample-data": {"faker~=8.1.1"},
"sample-entity": {"faker~=8.1.1"},
"superset": {},
"tableau": {"tableau-api-lib==0.1.22"},
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"},

View File

@ -18,7 +18,6 @@ from collections import namedtuple
from dataclasses import dataclass, field
from typing import Any, Dict, Iterable, List, Union
from faker import Faker
from pydantic import ValidationError
from metadata.config.common import ConfigModel
@ -163,58 +162,6 @@ class SampleTableMetadataGenerator:
return data
class GenerateFakeSampleData:
def __init__(self) -> None:
pass
@classmethod
def check_columns(self, columns):
fake = Faker()
colData = []
colList = [column["name"] for column in columns]
for i in range(25):
row = []
for column in columns:
col_name = column["name"]
value = None
if "id" in col_name:
value = uuid.uuid4()
elif "price" in col_name or "currency" in col_name:
value = fake.pricetag()
elif "barcode" in col_name:
value = fake.ean(length=13)
elif "phone" in col_name:
value = fake.phone_number()
elif "zip" in col_name:
value = fake.postcode()
elif "address" in col_name:
value = fake.street_address()
elif "company" in col_name:
value = fake.company()
elif "region" in col_name:
value = fake.street_address()
elif "name" in col_name:
value = fake.first_name()
elif "city" in col_name:
value = fake.city()
elif "country" in col_name:
value = fake.country()
if value is None:
if "TIMESTAMP" in column["dataType"] or "date" in col_name:
value = fake.unix_time()
elif "BOOLEAN" in column["dataType"]:
value = fake.pybool()
elif "NUMERIC" in column["dataType"]:
value = fake.pyint()
elif "VARCHAR" in column["dataType"]:
value = fake.text(max_nb_chars=20)
else:
value = None
row.append(value)
colData.append(row)
return {"columns": colList, "rows": colData}
class SampleDataSource(Source):
def __init__(
self, config: SampleDataSourceConfig, metadata_config: MetadataServerConfig, ctx
@ -393,10 +340,6 @@ class SampleDataSource(Source):
),
)
for table in self.tables["tables"]:
if not table.get("sampleData"):
table["sampleData"] = GenerateFakeSampleData.check_columns(
table["columns"]
)
table_metadata = Table(**table)
table_and_db = OMetaDatabaseAndTable(table=table_metadata, database=db)
self.status.scanned("table", table_metadata.name.__root__)

View File

@ -40,7 +40,6 @@ from metadata.ingestion.models.table_metadata import Chart, Dashboard
from metadata.ingestion.ometa.client import APIError
from metadata.ingestion.ometa.ometa_api import OpenMetadata
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
from metadata.ingestion.processor.pii import ColumnNameScanner
from metadata.ingestion.source.sql_source import SQLConnectionConfig
from metadata.utils.helpers import snake_to_camel
@ -91,7 +90,6 @@ class SampleEntitySource(Source):
self.config = config
self.metadata_config = metadata_config
self.metadata = OpenMetadata(metadata_config)
self.column_scanner = ColumnNameScanner()
self.service_name = lambda: self.faker.word()
self.service_type = lambda: random.choice(
["BigQuery", "Hive", "MSSQL", "MySQL", "Postgres", "Redshift", "Snowflake"]