test: add smoke test (#2464)

2025-08-21 07:38:13 +00:00 · 2021-04-29 23:27:03 -07:00 · 2021-04-29 23:27:03 -07:00 · 201ffd4979
commit 201ffd4979
parent df9e7c594f
8 changed files with 905 additions and 10 deletions
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@ -4,14 +4,14 @@ on:
    branches:
      - master
    paths-ignore:
-      - 'docs/**'
+      - "docs/**"
-      - '**.md'
+      - "**.md"
  pull_request:
    branches:
      - master
    paths-ignore:
-      - 'docs/**'
+      - "docs/**"
-      - '**.md'
+      - "**.md"
  release:
    types: [published, edited]
@ -26,7 +26,7 @@ jobs:
          java-version: 1.8
      - uses: actions/setup-python@v2
        with:
-          python-version: '3.6'
+          python-version: "3.6"
      - name: Gradle build (and test)
        run: ./gradlew build
      - name: Python ingest framework tests
@ -38,3 +38,29 @@ jobs:
          job-status: ${{ job.status }}
          slack-bot-token: ${{ secrets.SLACK_BOT_TOKEN }}
          channel: github-activities
  smoke-test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - name: Set up JDK 1.8
        uses: actions/setup-java@v1
        with:
          java-version: 1.8
      - uses: actions/setup-python@v2
        with:
          python-version: "3.6"
      - name: Gradle build
        run: ./gradlew build -x check -x docs-website:build
      - name: Smoke test
        run: |
          ./docker/dev.sh -d
          sleep 30
          ./smoke-test/smoke.sh
      - name: Slack failure notification
        if: failure() && github.event_name == 'push'
        uses: kpritam/slack-job-status-action@v1
        with:
          job-status: ${{ job.status }}
          slack-bot-token: ${{ secrets.SLACK_BOT_TOKEN }}
          channel: github-activities
--- a/docker/datahub-frontend/Dockerfile
+++ b/docker/datahub-frontend/Dockerfile
@ -43,5 +43,5 @@ ENV JAVA_OPTS=" \
   -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf \
   -Dlogback.configurationFile=datahub-frontend/conf/logback.xml \
   -Dlogback.debug=true \
-   -Dpidfile.path=/datahub-frontend/play.pid"
+   -Dpidfile.path=/dev/null"
 CMD ["datahub-frontend/bin/playBinary"]
--- a/metadata-ingestion/src/datahub/check/docker.py
+++ b/metadata-ingestion/src/datahub/check/docker.py
@ -28,9 +28,8 @@ REQUIRED_CONTAINERS = [
    # "kafka-rest-proxy",
 ]
-MIN_MEMORY_NEEDED = 8  # GB
+# Docker seems to under-report memory allocated, so we also need a bit of buffer to account for it.
-# docker seems to under-report memory allocated, adding a bit of buffer to account for it
+MIN_MEMORY_NEEDED = 6.75  # GB
 MEMORY_TOLERANCE = 0.2  # GB
@contextmanager
@ -59,7 +58,7 @@ def check_local_docker_containers() -> List[str]:
        # Check total memory.
        total_mem_configured = int(client.info()["MemTotal"])
-        if memory_in_gb(total_mem_configured) + MEMORY_TOLERANCE < MIN_MEMORY_NEEDED:
+        if memory_in_gb(total_mem_configured) < MIN_MEMORY_NEEDED:
            issues.append(
                f"Total Docker memory configured {memory_in_gb(total_mem_configured):.2f}GB is below the minimum threshold {MIN_MEMORY_NEEDED}GB"
            )
--- a/smoke-test/.gitignore
+++ b/smoke-test/.gitignore
@ -0,0 +1,132 @@
 .envrc
 .vscode/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 pip-wheel-metadata/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
--- a/smoke-test/requirements.txt
+++ b/smoke-test/requirements.txt
@ -0,0 +1,3 @@
 pytest>=6.2
 pytest-dependency>=0.5.1
 -e ../metadata-ingestion[datahub-rest,datahub-kafka]
--- a/smoke-test/sample_bq_data.json
+++ b/smoke-test/sample_bq_data.json
@ -0,0 +1,384 @@
 [
    {
        "auditHeader": null,
        "proposedSnapshot": {
            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
                "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times,PROD)",
                "aspects": [
                    {
                        "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
                            "description": "This dataset shows hourly average border crossing duration for US-Canada and US-Mexico borders starting from 2020-03-16. Hourly trip volume is compared to average trip volume calculated between Feb.1st and Mar.15th, 2020 as a control group in each country.",
                            "uri": null,
                            "tags": [],
                            "customProperties": {}
                        }
                    },
                    {
                        "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
                            "schemaName": "bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times",
                            "platform": "urn:li:dataPlatform:bigquery",
                            "version": 0,
                            "created": {
                                "time": 1616104630716,
                                "actor": "urn:li:corpuser:etl",
                                "impersonator": null
                            },
                            "lastModified": {
                                "time": 1616104630716,
                                "actor": "urn:li:corpuser:etl",
                                "impersonator": null
                            },
                            "deleted": null,
                            "dataset": null,
                            "cluster": null,
                            "hash": "",
                            "platformSchema": {
                                "com.linkedin.pegasus2avro.schema.MySqlDDL": {
                                    "tableSchema": ""
                                }
                            },
                            "fields": [
                                {
                                    "fieldPath": "border_id",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Unique ID of the border crossing",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "port_name",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Port Name in Canada or Mexico",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "port_name_us",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Port Name in the US",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "trip_direction",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Direction of the trip",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "hour_local",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Local hour of the data",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
                                        }
                                    },
                                    "nativeDataType": "Integer()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "date_local",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Local date of the data",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.DateType": {}
                                        }
                                    },
                                    "nativeDataType": "DATE()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "day_type",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Weekday/Weekend indicator",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "date_utc",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "UTC date of the data",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.DateType": {}
                                        }
                                    },
                                    "nativeDataType": "DATE()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "hour_utc",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "UTC hour of the data",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
                                        }
                                    },
                                    "nativeDataType": "Integer()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "avg_crossing_duration",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Average border crossing times (in minutes)",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
                                        }
                                    },
                                    "nativeDataType": "Float()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "aggregation_method",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Daily Average: the average is taken for the current LocalHour; Weekly Average: the average is taken for the full week prior to the current LocalDate; Monthly Average: the average is taken for the full month prior to the current LocalDate; Yearly Average: the average is taken for the full year prior to the LocalDate",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "percent_of_baseline_trip_volume",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Proportion of trips in this time interval as compared to Avg number of trips on the same hour of day in baseline period i.e 1st February 2020 - 15th March 2020. Data is only available for daily aggregation level with valid baseline number.",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
                                        }
                                    },
                                    "nativeDataType": "Float()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "border_zone",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Polygon of the Port in Canada or Mexico",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NullType": {}
                                        }
                                    },
                                    "nativeDataType": "NullType()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "province_code",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "ISO 3166-2 Country-Province code in Canada or Mexico",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "border_zone_us",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Polygon of the Port in the US",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NullType": {}
                                        }
                                    },
                                    "nativeDataType": "NullType()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "state_code_us",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "ISO 3166-2 Country-State code for US",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "border_latitude",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Latitude of the border",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
                                        }
                                    },
                                    "nativeDataType": "Float()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "border_longitude",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Longitude of the border",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.NumberType": {}
                                        }
                                    },
                                    "nativeDataType": "Float()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "border_geohash",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Geohash of the Border Station with level of 7",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                },
                                {
                                    "fieldPath": "version",
                                    "jsonPath": null,
                                    "nullable": true,
                                    "description": "Version of the table",
                                    "type": {
                                        "type": {
                                            "com.linkedin.pegasus2avro.schema.StringType": {}
                                        }
                                    },
                                    "nativeDataType": "String()",
                                    "recursive": false,
                                    "globalTags": null
                                }
                            ],
                            "primaryKeys": null,
                            "foreignKeysSpecs": null
                        }
                    }
                ]
            }
        },
        "proposedDelta": null
    },
    {
        "auditHeader": null,
        "proposedSnapshot": {
            "com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot": {
                "urn": "urn:li:corpuser:Geotab",
                "aspects": [
                    {
                        "com.linkedin.pegasus2avro.identity.CorpUserInfo": {
                            "active": true,
                            "displayName": "Geotab",
                            "email": "Geotab-demo@example.com",
                            "title": null,
                            "managerUrn": null,
                            "departmentId": null,
                            "departmentName": null,
                            "firstName": null,
                            "lastName": null,
                            "fullName": "Geotab",
                            "countryCode": null
                        }
                    }
                ]
            }
        },
        "proposedDelta": null
    },
    {
        "auditHeader": null,
        "proposedSnapshot": {
            "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
                "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times,PROD)",
                "aspects": [
                    {
                        "com.linkedin.pegasus2avro.common.Ownership": {
                            "owners": [
                                {
                                    "owner": "urn:li:corpuser:Geotab",
                                    "type": "DATAOWNER",
                                    "source": null
                                }
                            ],
                            "lastModified": {
                                "time": 1616107219522,
                                "actor": "urn:li:corpuser:datahub",
                                "impersonator": null
                            }
                        }
                    }
                ]
            }
        },
        "proposedDelta": null
    }
 ]
--- a/smoke-test/smoke.sh
+++ b/smoke-test/smoke.sh
@ -0,0 +1,26 @@
 #!/bin/bash
 # Runs a basic e2e test. It is not meant to be fully comprehensive,
 # but rather should catch obvious bugs before they make it into prod.
 #
 # Script assumptions:
 #   - The gradle build has already been run.
 #   - Python 3.6+ is installed.
 #   - The metadata-ingestion codegen script has been run.
 #   - A full DataHub setup is running on localhost with standard ports.
 #     The easiest way to do this is by using the quickstart or dev
 #     quickstart scripts.
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 cd "$DIR"
 set -euxo pipefail
 python3 -m venv venv
 source venv/bin/activate
 pip install --upgrade pip wheel setuptools
 pip install -r requirements.txt
 (cd ../metadata-ingestion && ./scripts/codegen.sh)
 pytest -vv
--- a/smoke-test/test_e2e.py
+++ b/smoke-test/test_e2e.py
@ -0,0 +1,325 @@
 import time
 import pytest
 import requests
 from datahub.ingestion.run.pipeline import Pipeline
 from datahub.check.docker import check_local_docker_containers
 GMS_ENDPOINT = "http://localhost:8080"
 FRONTEND_ENDPOINT = "http://localhost:9002"
 KAFKA_BROKER = "localhost:9092"
 bootstrap_sample_data = "../metadata-ingestion/examples/mce_files/bootstrap_mce.json"
 bq_sample_data = "./sample_bq_data.json"
 restli_default_headers = {
    "X-RestLi-Protocol-Version": "2.0.0",
 }
 kafka_post_ingestion_wait_sec = 60
 healthcheck_wait_retries = 20
 healthcheck_wait_interval_sec = 15
@pytest.fixture(scope="session")
 def wait_for_healthchecks():
    tries = 0
    while tries < healthcheck_wait_retries:
        if tries > 0:
            time.sleep(healthcheck_wait_interval_sec)
        tries += 1
        issues = check_local_docker_containers()
        if not issues:
            print(f"finished waiting for healthchecks after {tries} tries")
            yield
            return
    issues_str = '\n'.join(f"- {issue}" for issue in issues)
    raise RuntimeError(f"retry limit exceeded while waiting for docker healthchecks\n{issues_str}")
@pytest.mark.dependency()
 def test_healthchecks(wait_for_healthchecks):
    # Call to wait_for_healthchecks fixture will do the actual functionality.
    pass
@pytest.mark.dependency(depends=["test_healthchecks"])
 def test_ingestion_via_rest(wait_for_healthchecks):
    pipeline = Pipeline.create(
        {
            "source": {
                "type": "file",
                "config": {"filename": bootstrap_sample_data},
            },
            "sink": {
                "type": "datahub-rest",
                "config": {"server": GMS_ENDPOINT},
            },
        }
    )
    pipeline.run()
    pipeline.raise_from_status()
@pytest.mark.dependency(depends=["test_healthchecks"])
 def test_ingestion_via_kafka(wait_for_healthchecks):
    pipeline = Pipeline.create(
        {
            "source": {
                "type": "file",
                "config": {"filename": bq_sample_data},
            },
            "sink": {
                "type": "datahub-kafka",
                "config": {
                    "connection": {
                        "bootstrap": KAFKA_BROKER,
                    }
                },
            },
        }
    )
    pipeline.run()
    pipeline.raise_from_status()
    # Since Kafka emission is asynchronous, we must wait a little bit so that
    # the changes are actually processed.
    time.sleep(kafka_post_ingestion_wait_sec)
@pytest.mark.dependency(depends=["test_ingestion_via_rest", "test_ingestion_via_kafka"])
 def test_run_ingestion(wait_for_healthchecks):
    # Dummy test so that future ones can just depend on this one.
    pass
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_gms_list_data_platforms():
    response = requests.get(
        f"{GMS_ENDPOINT}/dataPlatforms",
        headers={
            **restli_default_headers,
            "X-RestLi-Method": "get_all",
        },
    )
    response.raise_for_status()
    data = response.json()
    assert len(data["elements"]) > 10
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_gms_get_all_users():
    response = requests.get(
        f"{GMS_ENDPOINT}/corpUsers",
        headers={
            **restli_default_headers,
            "X-RestLi-Method": "get_all",
        },
    )
    response.raise_for_status()
    data = response.json()
    assert len(data["elements"]) >= 3
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_gms_get_user():
    username = "jdoe"
    response = requests.get(
        f"{GMS_ENDPOINT}/corpUsers/($params:(),name:{username})",
        headers={
            **restli_default_headers,
        },
    )
    response.raise_for_status()
    data = response.json()
    assert data["username"] == username
    assert data["info"]["displayName"]
    assert data["info"]["email"]
@pytest.mark.parametrize(
    "platform,dataset_name,env",
    [
        (
            # This one tests the bootstrap sample data.
            "urn:li:dataPlatform:kafka",
            "SampleKafkaDataset",
            "PROD",
        ),
        (
            # This one tests BigQuery ingestion.
            "urn:li:dataPlatform:bigquery",
            "bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times",
            "PROD",
        ),
    ],
 )
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_gms_get_dataset(platform, dataset_name, env):
    platform = "urn:li:dataPlatform:bigquery"
    dataset_name = (
        "bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times"
    )
    env = "PROD"
    urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
    response = requests.get(
        f"{GMS_ENDPOINT}/datasets/($params:(),name:{dataset_name},origin:{env},platform:{requests.utils.quote(platform)})",
        headers={
            **restli_default_headers,
            "X-RestLi-Method": "get",
        },
    )
    response.raise_for_status()
    data = response.json()
    assert data["urn"] == urn
    assert data["name"] == dataset_name
    assert data["platform"] == platform
    assert len(data["schemaMetadata"]["fields"]) >= 2
@pytest.mark.parametrize(
    "query,min_expected_results",
    [
        ("covid", 1),
        ("sample", 3),
    ],
 )
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_gms_search_dataset(query, min_expected_results):
    response = requests.get(
        f"{GMS_ENDPOINT}/datasets?q=search&input={query}",
        headers={
            **restli_default_headers,
            "X-RestLi-Method": "finder",
        },
    )
    response.raise_for_status()
    data = response.json()
    assert len(data["elements"]) >= min_expected_results
    assert data["paging"]["total"] >= min_expected_results
    assert data["elements"][0]["urn"]
@pytest.fixture(scope="session")
 def frontend_session(wait_for_healthchecks):
    session = requests.Session()
    headers = {
        "Content-Type": "application/json",
    }
    data = '{"username":"datahub", "password":"datahub"}'
    response = session.post(
        f"{FRONTEND_ENDPOINT}/authenticate", headers=headers, data=data
    )
    response.raise_for_status()
    yield session
@pytest.mark.dependency(depends=["test_healthchecks"])
 def test_frontend_auth(frontend_session):
    pass
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_frontend_browse_datasets(frontend_session):
    response = frontend_session.get(
        f"{FRONTEND_ENDPOINT}/api/v2/browse?type=dataset&path=/prod"
    )
    response.raise_for_status()
    data = response.json()
    assert data["metadata"]["totalNumEntities"] >= 4
    assert len(data["metadata"]["groups"]) >= 4
    assert len(data["metadata"]["groups"]) <= 8
@pytest.mark.parametrize(
    "query,min_expected_results",
    [
        ("covid", 1),
        ("sample", 3),
    ],
 )
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_frontend_browse_datasets(frontend_session, query, min_expected_results):
    response = frontend_session.get(
        f"{FRONTEND_ENDPOINT}/api/v2/search?type=dataset&input={query}"
    )
    response.raise_for_status()
    data = response.json()
    assert len(data["elements"]) >= min_expected_results
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_frontend_list_users(frontend_session):
    response = frontend_session.get(f"{FRONTEND_ENDPOINT}/api/v1/party/entities")
    response.raise_for_status()
    data = response.json()
    assert data["status"] == "ok"
    assert len(data["userEntities"]) >= 3
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_frontend_user_info(frontend_session):
    response = frontend_session.get(f"{FRONTEND_ENDPOINT}/api/v1/user/me")
    response.raise_for_status()
    data = response.json()
    assert data["status"] == "ok"
    assert data["user"]["userName"] == "datahub"
    assert data["user"]["name"]
    assert data["user"]["email"]
@pytest.mark.parametrize(
    "platform,dataset_name,env",
    [
        (
            # This one tests the bootstrap sample data.
            "urn:li:dataPlatform:kafka",
            "SampleKafkaDataset",
            "PROD",
        ),
        (
            # This one tests BigQuery ingestion.
            "urn:li:dataPlatform:bigquery",
            "bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times",
            "PROD",
        ),
    ],
 )
@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"])
 def test_frontend_user_info(frontend_session, platform, dataset_name, env):
    urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
    # Basic dataset info.
    response = frontend_session.get(f"{FRONTEND_ENDPOINT}/api/v2/datasets/{urn}")
    response.raise_for_status()
    data = response.json()
    assert data["nativeName"] == dataset_name
    assert data["fabric"] == env
    assert data["uri"] == urn
    # Schema info.
    response = frontend_session.get(f"{FRONTEND_ENDPOINT}/api/v2/datasets/{urn}/schema")
    response.raise_for_status()
    data = response.json()
    assert len(data["schema"]["columns"]) >= 2
    # Ownership info.
    response = frontend_session.get(f"{FRONTEND_ENDPOINT}/api/v2/datasets/{urn}/owners")
    response.raise_for_status()
    data = response.json()
    assert len(data["owners"]) >= 1