mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-24 22:18:41 +00:00
Remove faker from Sample-data (#1536)
* Removed Faker as a dependency * Added ModuleNotFoundError * Removed faker from requirements.txt * Removed SampleData plugin from setup.py
This commit is contained in:
parent
db3d10ac64
commit
85f132738b
@ -4,7 +4,6 @@ expandvars~=0.6.5
|
||||
requests~=2.25.1
|
||||
python-dateutil~=2.8.1
|
||||
SQLAlchemy~=1.4.5
|
||||
Faker~=8.1.1
|
||||
commonregex~=1.5.4
|
||||
setuptools~=57.0.0
|
||||
PyHive~=0.6.4
|
||||
|
||||
@ -108,7 +108,7 @@ plugins: Dict[str, Set[str]] = {
|
||||
"data-profiler": {"openmetadata-data-profiler"},
|
||||
"snowflake": {"snowflake-sqlalchemy<=1.2.4"},
|
||||
"snowflake-usage": {"snowflake-sqlalchemy<=1.2.4"},
|
||||
"sample-data": {"faker~=8.1.1"},
|
||||
"sample-entity": {"faker~=8.1.1"},
|
||||
"superset": {},
|
||||
"tableau": {"tableau-api-lib==0.1.22"},
|
||||
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"},
|
||||
|
||||
@ -18,7 +18,6 @@ from collections import namedtuple
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, Iterable, List, Union
|
||||
|
||||
from faker import Faker
|
||||
from pydantic import ValidationError
|
||||
|
||||
from metadata.config.common import ConfigModel
|
||||
@ -163,58 +162,6 @@ class SampleTableMetadataGenerator:
|
||||
return data
|
||||
|
||||
|
||||
class GenerateFakeSampleData:
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def check_columns(self, columns):
|
||||
fake = Faker()
|
||||
colData = []
|
||||
colList = [column["name"] for column in columns]
|
||||
for i in range(25):
|
||||
row = []
|
||||
for column in columns:
|
||||
col_name = column["name"]
|
||||
value = None
|
||||
if "id" in col_name:
|
||||
value = uuid.uuid4()
|
||||
elif "price" in col_name or "currency" in col_name:
|
||||
value = fake.pricetag()
|
||||
elif "barcode" in col_name:
|
||||
value = fake.ean(length=13)
|
||||
elif "phone" in col_name:
|
||||
value = fake.phone_number()
|
||||
elif "zip" in col_name:
|
||||
value = fake.postcode()
|
||||
elif "address" in col_name:
|
||||
value = fake.street_address()
|
||||
elif "company" in col_name:
|
||||
value = fake.company()
|
||||
elif "region" in col_name:
|
||||
value = fake.street_address()
|
||||
elif "name" in col_name:
|
||||
value = fake.first_name()
|
||||
elif "city" in col_name:
|
||||
value = fake.city()
|
||||
elif "country" in col_name:
|
||||
value = fake.country()
|
||||
if value is None:
|
||||
if "TIMESTAMP" in column["dataType"] or "date" in col_name:
|
||||
value = fake.unix_time()
|
||||
elif "BOOLEAN" in column["dataType"]:
|
||||
value = fake.pybool()
|
||||
elif "NUMERIC" in column["dataType"]:
|
||||
value = fake.pyint()
|
||||
elif "VARCHAR" in column["dataType"]:
|
||||
value = fake.text(max_nb_chars=20)
|
||||
else:
|
||||
value = None
|
||||
row.append(value)
|
||||
colData.append(row)
|
||||
return {"columns": colList, "rows": colData}
|
||||
|
||||
|
||||
class SampleDataSource(Source):
|
||||
def __init__(
|
||||
self, config: SampleDataSourceConfig, metadata_config: MetadataServerConfig, ctx
|
||||
@ -393,10 +340,6 @@ class SampleDataSource(Source):
|
||||
),
|
||||
)
|
||||
for table in self.tables["tables"]:
|
||||
if not table.get("sampleData"):
|
||||
table["sampleData"] = GenerateFakeSampleData.check_columns(
|
||||
table["columns"]
|
||||
)
|
||||
table_metadata = Table(**table)
|
||||
table_and_db = OMetaDatabaseAndTable(table=table_metadata, database=db)
|
||||
self.status.scanned("table", table_metadata.name.__root__)
|
||||
|
||||
@ -40,7 +40,6 @@ from metadata.ingestion.models.table_metadata import Chart, Dashboard
|
||||
from metadata.ingestion.ometa.client import APIError
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
from metadata.ingestion.ometa.openmetadata_rest import MetadataServerConfig
|
||||
from metadata.ingestion.processor.pii import ColumnNameScanner
|
||||
from metadata.ingestion.source.sql_source import SQLConnectionConfig
|
||||
from metadata.utils.helpers import snake_to_camel
|
||||
|
||||
@ -91,7 +90,6 @@ class SampleEntitySource(Source):
|
||||
self.config = config
|
||||
self.metadata_config = metadata_config
|
||||
self.metadata = OpenMetadata(metadata_config)
|
||||
self.column_scanner = ColumnNameScanner()
|
||||
self.service_name = lambda: self.faker.word()
|
||||
self.service_type = lambda: random.choice(
|
||||
["BigQuery", "Hive", "MSSQL", "MySQL", "Postgres", "Redshift", "Snowflake"]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user