import random
import string
from typing import Any, Dict
from unittest.mock import patch

from freezegun import freeze_time
from google.cloud.bigquery.table import TableListItem

from datahub.ingestion.glossary.classifier import (
    ClassificationConfig,
    DynamicTypedClassifierConfig,
)
from datahub.ingestion.glossary.datahub_classifier import DataHubClassifierConfig
from datahub.ingestion.source.bigquery_v2.bigquery_data_reader import BigQueryDataReader
from datahub.ingestion.source.bigquery_v2.bigquery_schema import (
    BigqueryColumn,
    BigqueryDataset,
    BigqueryProject,
    BigQuerySchemaApi,
    BigqueryTable,
)
from datahub.ingestion.source.bigquery_v2.bigquery_schema_gen import (
    BigQuerySchemaGenerator,
)
from tests.test_helpers import mce_helpers
from tests.test_helpers.state_helpers import run_and_get_pipeline

FROZEN_TIME = "2022-02-03 07:00:00"


def random_email():
    return (
        "".join(
            [
                random.choice(string.ascii_lowercase)
                for i in range(random.randint(10, 15))
            ]
        )
        + "@xyz.com"
    )


def recipe(mcp_output_path: str, override: dict = {}) -> dict:
    return {
        "source": {
            "type": "bigquery",
            "config": {
                "project_ids": ["project-id-1"],
                "include_usage_statistics": False,
                "include_table_lineage": False,
                "include_data_platform_instance": True,
                "classification": ClassificationConfig(
                    enabled=True,
                    classifiers=[
                        DynamicTypedClassifierConfig(
                            type="datahub",
                            config=DataHubClassifierConfig(
                                minimum_values_threshold=1,
                            ),
                        )
                    ],
                    max_workers=1,
                ).dict(),
            },
        },
        "sink": {"type": "file", "config": {"filename": mcp_output_path}},
    }


@freeze_time(FROZEN_TIME)
@patch.object(BigQuerySchemaApi, "get_tables_for_dataset")
@patch.object(BigQuerySchemaGenerator, "get_core_table_details")
@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id")
@patch.object(BigQuerySchemaApi, "get_columns_for_dataset")
@patch.object(BigQueryDataReader, "get_sample_data_for_table")
@patch("google.cloud.bigquery.Client")
@patch("google.cloud.datacatalog_v1.PolicyTagManagerClient")
@patch("google.cloud.resourcemanager_v3.ProjectsClient")
def test_bigquery_v2_ingest(
    client,
    policy_tag_manager_client,
    projects_client,
    get_sample_data_for_table,
    get_columns_for_dataset,
    get_datasets_for_project_id,
    get_core_table_details,
    get_tables_for_dataset,
    pytestconfig,
    tmp_path,
):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2"
    mcp_golden_path = f"{test_resources_dir}/bigquery_mcp_golden.json"
    mcp_output_path = "{}/{}".format(tmp_path, "bigquery_mcp_output.json")

    get_datasets_for_project_id.return_value = [
        BigqueryDataset(name="bigquery-dataset-1")
    ]

    table_list_item = TableListItem(
        {"tableReference": {"projectId": "", "datasetId": "", "tableId": ""}}
    )
    table_name = "table-1"
    get_core_table_details.return_value = {table_name: table_list_item}
    get_columns_for_dataset.return_value = {
        table_name: [
            BigqueryColumn(
                name="age",
                ordinal_position=1,
                is_nullable=False,
                field_path="col_1",
                data_type="INT",
                comment="comment",
                is_partition_column=False,
                cluster_column_position=None,
                policy_tags=["Test Policy Tag"],
            ),
            BigqueryColumn(
                name="email",
                ordinal_position=1,
                is_nullable=False,
                field_path="col_2",
                data_type="STRING",
                comment="comment",
                is_partition_column=False,
                cluster_column_position=None,
            ),
        ]
    }
    get_sample_data_for_table.return_value = {
        "age": [random.randint(1, 80) for i in range(20)],
        "email": [random_email() for i in range(20)],
    }

    bigquery_table = BigqueryTable(
        name=table_name,
        comment=None,
        created=None,
        last_altered=None,
        size_in_bytes=None,
        rows_count=None,
    )
    get_tables_for_dataset.return_value = iter([bigquery_table])

    pipeline_config_dict: Dict[str, Any] = recipe(mcp_output_path=mcp_output_path)

    run_and_get_pipeline(pipeline_config_dict)

    mce_helpers.check_golden_file(
        pytestconfig,
        output_path=mcp_output_path,
        golden_path=mcp_golden_path,
    )


@freeze_time(FROZEN_TIME)
@patch.object(BigQuerySchemaApi, attribute="get_projects_with_labels")
@patch.object(BigQuerySchemaApi, "get_tables_for_dataset")
@patch.object(BigQuerySchemaGenerator, "get_core_table_details")
@patch.object(BigQuerySchemaApi, "get_datasets_for_project_id")
@patch.object(BigQuerySchemaApi, "get_columns_for_dataset")
@patch.object(BigQueryDataReader, "get_sample_data_for_table")
@patch("google.cloud.bigquery.Client")
@patch("google.cloud.datacatalog_v1.PolicyTagManagerClient")
@patch("google.cloud.resourcemanager_v3.ProjectsClient")
def test_bigquery_v2_project_labels_ingest(
    client,
    policy_tag_manager_client,
    projects_client,
    get_sample_data_for_table,
    get_columns_for_dataset,
    get_datasets_for_project_id,
    get_core_table_details,
    get_tables_for_dataset,
    get_projects_with_labels,
    pytestconfig,
    tmp_path,
):
    test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2"
    mcp_golden_path = f"{test_resources_dir}/bigquery_project_label_mcp_golden.json"
    mcp_output_path = "{}/{}".format(tmp_path, "bigquery_project_label_mcp_output.json")

    get_datasets_for_project_id.return_value = [
        BigqueryDataset(name="bigquery-dataset-1")
    ]

    get_projects_with_labels.return_value = [
        BigqueryProject(id="dev", name="development")
    ]

    table_list_item = TableListItem(
        {"tableReference": {"projectId": "", "datasetId": "", "tableId": ""}}
    )
    table_name = "table-1"
    get_core_table_details.return_value = {table_name: table_list_item}
    get_columns_for_dataset.return_value = {
        table_name: [
            BigqueryColumn(
                name="age",
                ordinal_position=1,
                is_nullable=False,
                field_path="col_1",
                data_type="INT",
                comment="comment",
                is_partition_column=False,
                cluster_column_position=None,
                policy_tags=["Test Policy Tag"],
            ),
            BigqueryColumn(
                name="email",
                ordinal_position=1,
                is_nullable=False,
                field_path="col_2",
                data_type="STRING",
                comment="comment",
                is_partition_column=False,
                cluster_column_position=None,
            ),
        ]
    }
    get_sample_data_for_table.return_value = {
        "age": [random.randint(1, 80) for i in range(20)],
        "email": [random_email() for i in range(20)],
    }

    bigquery_table = BigqueryTable(
        name=table_name,
        comment=None,
        created=None,
        last_altered=None,
        size_in_bytes=None,
        rows_count=None,
    )
    get_tables_for_dataset.return_value = iter([bigquery_table])

    pipeline_config_dict: Dict[str, Any] = recipe(mcp_output_path=mcp_output_path)

    del pipeline_config_dict["source"]["config"]["project_ids"]

    pipeline_config_dict["source"]["config"]["project_labels"] = [
        "environment:development"
    ]

    run_and_get_pipeline(pipeline_config_dict)

    mce_helpers.check_golden_file(
        pytestconfig,
        output_path=mcp_output_path,
        golden_path=mcp_golden_path,
    )