Fix #10320: Add Athena Usage & Lineage (#10879)

2025-08-24 08:58:06 +00:00 · 2023-04-03 16:51:55 +05:30 · 2023-04-03 16:51:55 +05:30 · 69729ac8eb
commit 69729ac8eb
parent 905201548d
20 changed files with 593 additions and 178 deletions
--- a/ingestion/src/metadata/clients/aws_client.py
+++ b/ingestion/src/metadata/clients/aws_client.py
@ -33,6 +33,7 @@ class AWSServices(Enum):
    SAGEMAKER = "sagemaker"
    KINESIS = "kinesis"
    QUICKSIGHT = "quicksight"
    ATHENA = "athena"
 class AWSAssumeRoleException(Exception):
@ -179,3 +180,6 @@ class AWSClient:
    def get_quicksight_client(self):
        return self.get_client(AWSServices.QUICKSIGHT.value)
    def get_athena_client(self):
        return self.get_client(AWSServices.ATHENA.value)
--- a/ingestion/src/metadata/examples/workflows/athena_lineage.yaml
+++ b/ingestion/src/metadata/examples/workflows/athena_lineage.yaml
@ -0,0 +1,16 @@
 source:
  type: athena-lineage
  serviceName: local_athena
  sourceConfig:
    config:
      type: DatabaseLineage
      queryLogDuration: 1
      resultLimit: 10000
 sink:
  type: metadata-rest
  config: {}
 workflowConfig:
  loggerLevel: DEBUG
  openMetadataServerConfig:
    hostPort: http://localhost:8585/api
    authProvider: no-auth
--- a/ingestion/src/metadata/examples/workflows/athena_usage.yaml
+++ b/ingestion/src/metadata/examples/workflows/athena_usage.yaml
@ -0,0 +1,23 @@
 source:
  type: athena-usage
  serviceName: local_athena
  sourceConfig:
    config:
      type: DatabaseUsage
      queryLogDuration: 1
      resultLimit: 1000
 processor:
  type: query-parser
  config: {}
 stage:
  type: table-usage
  config:
    filename: /tmp/athena_usage
 bulkSink:
  type: metadata-usage
  config:
    filename: /tmp/athena_usage
 workflowConfig:
  openMetadataServerConfig:
    hostPort: http://localhost:8585/api
    authProvider: no-auth
--- a/ingestion/src/metadata/ingestion/source/database/athena/lineage.py
+++ b/ingestion/src/metadata/ingestion/source/database/athena/lineage.py
@ -0,0 +1,44 @@
 #  Copyright 2021 Collate
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #  http://www.apache.org/licenses/LICENSE-2.0
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 """
 Athena lineage module
 """
 from typing import Iterable, Optional
 from metadata.generated.schema.type.tableQuery import TableQuery
 from metadata.ingestion.source.database.athena.query_parser import (
    AthenaQueryParserSource,
 )
 from metadata.ingestion.source.database.lineage_source import LineageSource
 from metadata.utils.logger import ingestion_logger
 logger = ingestion_logger()
 class AthenaLineageSource(AthenaQueryParserSource, LineageSource):
    """
    Athena Lineage Source
    """
    def yield_table_query(self) -> Optional[Iterable[TableQuery]]:
        """
        Method to yield TableQueries
        """
        for query_list in self.get_queries() or []:
            for query in query_list.QueryExecutions:
                if (
                    query.Status.SubmissionDateTime.date() >= self.start.date()
                    and self.is_not_dbt_or_om_query(query.Query)
                ):
                    yield TableQuery(
                        query=query.Query,
                        serviceName=self.config.serviceName,
                    )
--- a/ingestion/src/metadata/ingestion/source/database/athena/models.py
+++ b/ingestion/src/metadata/ingestion/source/database/athena/models.py
@ -0,0 +1,77 @@
 #  Copyright 2021 Collate
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #  http://www.apache.org/licenses/LICENSE-2.0
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 """
 Athena Models
 """
 from datetime import datetime
 from typing import List
 from pydantic import BaseModel
 class QueryExecutionIdsResponse(BaseModel):
    QueryExecutionIds: List[str]
 class ResultReuseByAgeConfiguration(BaseModel):
    Enabled: bool
 class ResultConfiguration(BaseModel):
    OutputLocation: str
 class QueryExecutionContext(BaseModel):
    Database: str
    Catalog: str
 class Status(BaseModel):
    State: str
    SubmissionDateTime: datetime
    CompletionDateTime: datetime
 class ResultReuseInformation(BaseModel):
    ReusedPreviousResult: bool
 class Statistics(BaseModel):
    EngineExecutionTimeInMillis: int
    DataScannedInBytes: int
    TotalExecutionTimeInMillis: int
    QueryQueueTimeInMillis: int
    ServiceProcessingTimeInMillis: int
    ResultReuseInformation: ResultReuseInformation
 class EngineVersion(BaseModel):
    SelectedEngineVersion: str
    EffectiveEngineVersion: str
 class AthenaQueryExecution(BaseModel):
    QueryExecutionId: str
    Query: str
    StatementType: str
    ResultConfiguration: ResultConfiguration
    ResultReuseConfiguration: dict
    QueryExecutionContext: QueryExecutionContext
    Status: Status
    Statistics: Statistics
    WorkGroup: str
    EngineVersion: EngineVersion
    SubstatementType: str
 class AthenaQueryExecutionList(BaseModel):
    QueryExecutions: List[AthenaQueryExecution]
--- a/ingestion/src/metadata/ingestion/source/database/athena/query_parser.py
+++ b/ingestion/src/metadata/ingestion/source/database/athena/query_parser.py
@ -0,0 +1,85 @@
 #  Copyright 2021 Collate
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #  http://www.apache.org/licenses/LICENSE-2.0
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 """
 Athena Query parser module
 """
 from abc import ABC
 from math import ceil
 from metadata.clients.aws_client import AWSClient
 from metadata.generated.schema.entity.services.connections.database.athenaConnection import (
    AthenaConnection,
 )
 from metadata.generated.schema.entity.services.connections.metadata.openMetadataConnection import (
    OpenMetadataConnection,
 )
 from metadata.generated.schema.metadataIngestion.workflow import (
    Source as WorkflowSource,
 )
 from metadata.ingestion.api.source import InvalidSourceException
 from metadata.ingestion.source.database.athena.models import (
    AthenaQueryExecutionList,
    QueryExecutionIdsResponse,
 )
 from metadata.ingestion.source.database.query_parser_source import QueryParserSource
 from metadata.utils.constants import QUERY_WITH_DBT, QUERY_WITH_OM_VERSION
 from metadata.utils.logger import ingestion_logger
 logger = ingestion_logger()
 ATHENA_QUERY_PAGINATOR_LIMIT = 50
 class AthenaQueryParserSource(QueryParserSource, ABC):
    """
    Athena base for Usage and Lineage
    """
    filters: str
    def __init__(self, config: WorkflowSource, metadata_config: OpenMetadataConnection):
        super().__init__(config, metadata_config)
        self.client = AWSClient(self.service_connection.awsConfig).get_athena_client()
    @classmethod
    def create(cls, config_dict, metadata_config: OpenMetadataConnection):
        """Create class instance"""
        config: WorkflowSource = WorkflowSource.parse_obj(config_dict)
        connection: AthenaConnection = config.serviceConnection.__root__.config
        if not isinstance(connection, AthenaConnection):
            raise InvalidSourceException(
                f"Expected AthenaConnection, but got {connection}"
            )
        return cls(config, metadata_config)
    def get_queries(self):
        query_limit = ceil(
            self.source_config.resultLimit / ATHENA_QUERY_PAGINATOR_LIMIT
        )
        paginator = self.client.get_paginator("list_query_executions")
        paginator_response = paginator.paginate()
        for response in paginator_response:
            response_obj = QueryExecutionIdsResponse(**response)
            query_details_response = self.client.batch_get_query_execution(
                QueryExecutionIds=response_obj.QueryExecutionIds
            )
            query_details_list = AthenaQueryExecutionList(**query_details_response)
            yield query_details_list
            query_limit -= 1
            if not query_limit:
                break
    def is_not_dbt_or_om_query(self, query_text: str) -> bool:
        return not (
            query_text.startswith(QUERY_WITH_DBT)
            or query_text.startswith(QUERY_WITH_OM_VERSION)
        )
--- a/ingestion/src/metadata/ingestion/source/database/athena/usage.py
+++ b/ingestion/src/metadata/ingestion/source/database/athena/usage.py
@ -0,0 +1,58 @@
 #  Copyright 2021 Collate
 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at
 #  http://www.apache.org/licenses/LICENSE-2.0
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 """
 Athena usage module
 """
 from typing import Iterable, Optional
 from metadata.generated.schema.type.tableQuery import TableQueries, TableQuery
 from metadata.ingestion.source.database.athena.query_parser import (
    AthenaQueryParserSource,
 )
 from metadata.ingestion.source.database.usage_source import UsageSource
 from metadata.utils.logger import ingestion_logger
 logger = ingestion_logger()
 QUERY_ABORTED_STATE = "CANCELLED"
 DATETIME_SEPARATOR = " "
 DATETIME_TIME_SPEC = "seconds"
 class AthenaUsageSource(AthenaQueryParserSource, UsageSource):
    """
    Athena Usage Source
    """
    def yield_table_queries(self) -> Optional[Iterable[TableQuery]]:
        """
        Method to yield TableQueries
        """
        for query_list in self.get_queries() or []:
            queries = [
                TableQuery(
                    query=query.Query,
                    startTime=query.Status.SubmissionDateTime.isoformat(
                        DATETIME_SEPARATOR, DATETIME_TIME_SPEC
                    ),
                    endTime=query.Status.SubmissionDateTime.isoformat(
                        DATETIME_SEPARATOR, DATETIME_TIME_SPEC
                    ),
                    analysisDate=query.Status.SubmissionDateTime,
                    serviceName=self.config.serviceName,
                    duration=query.Statistics.TotalExecutionTimeInMillis,
                    aborted=query.Status.State == QUERY_ABORTED_STATE,
                )
                for query in query_list.QueryExecutions
                if query.Status.SubmissionDateTime.date() >= self.start.date()
                and self.is_not_dbt_or_om_query(query.Query)
            ]
            yield TableQueries(queries=queries)
--- a/ingestion/src/metadata/ingestion/source/database/databricks/client.py
+++ b/ingestion/src/metadata/ingestion/source/database/databricks/client.py
@ -22,12 +22,11 @@ from metadata.generated.schema.entity.services.connections.database.databricksCo
    DatabricksConnection,
 )
 from metadata.ingestion.ometa.client import APIError
 from metadata.utils.constants import QUERY_WITH_DBT, QUERY_WITH_OM_VERSION
 from metadata.utils.helpers import datetime_to_ts
 from metadata.utils.logger import ingestion_logger
 logger = ingestion_logger()
 QUERY_WITH_OM_VERSION = '/* {"app": "OpenMetadata"'
 QUERY_WITH_DBT = '/* {"app": "dbt"'
 API_TIMEOUT = 10
--- a/ingestion/src/metadata/ingestion/source/database/databricks/lineage.py
+++ b/ingestion/src/metadata/ingestion/source/database/databricks/lineage.py
@ -11,7 +11,6 @@
 """
 Databricks lineage module
 """
 import csv
 import traceback
 from datetime import datetime
 from typing import Iterator, Optional
@ -31,32 +30,7 @@ class DatabricksLineageSource(DatabricksQueryParserSource, LineageSource):
    Databricks Lineage Source
    """
-    def get_table_query(self) -> Optional[Iterator[TableQuery]]:
+    def yield_table_query(self) -> Optional[Iterator[TableQuery]]:
        """
        If queryLogFilePath available in config iterate through log file
        otherwise execute the sql query to fetch TableQuery data.
        This is a simplified version of the UsageSource query parsing.
        """
        if self.config.sourceConfig.config.queryLogFilePath:
            with open(
                self.config.sourceConfig.config.queryLogFilePath, "r", encoding="utf-8"
            ) as file:
                for row in csv.DictReader(file):
                    query_dict = dict(row)
                    yield TableQuery(
                        query=query_dict["query_text"],
                        databaseName=self.get_database_name(query_dict),
                        serviceName=self.config.serviceName,
                        databaseSchema=self.get_schema_name(query_dict),
                    )
        else:
            logger.info(
                f"Scanning query logs for {self.start.date()} - {self.end.date()}"
            )
            try:
        data = self.client.list_query_history(
            start_date=self.start,
            end_date=self.end,
@ -75,6 +49,3 @@ class DatabricksLineageSource(DatabricksQueryParserSource, LineageSource):
            except Exception as exc:
                logger.debug(traceback.format_exc())
                logger.warning(f"Error processing query_dict {row}: {exc}")
            except Exception as exc:
                logger.debug(traceback.format_exc())
                logger.error(f"Source usage processing error: {exc}")
--- a/ingestion/src/metadata/ingestion/source/database/databricks/usage.py
+++ b/ingestion/src/metadata/ingestion/source/database/databricks/usage.py
@ -11,7 +11,6 @@
 """
 Databricks usage module
 """
 import csv
 import traceback
 from datetime import datetime
 from typing import Iterable, Optional
@ -31,61 +30,10 @@ class DatabricksUsageSource(DatabricksQueryParserSource, UsageSource):
    Databricks Usage Source
    """
-    def get_table_query(self) -> Iterable[TableQuery]:
+    def yield_table_query(self) -> Optional[Iterable[TableQuery]]:
        try:
            if self.config.sourceConfig.config.queryLogFilePath:
                table_query_list = []
                with open(
                    self.config.sourceConfig.config.queryLogFilePath,
                    "r",
                    encoding="utf-8",
                ) as query_log_file:
                    for raw in csv.DictReader(query_log_file):
                        query_dict = dict(raw)
                        analysis_date = (
                            datetime.utcnow()
                            if not query_dict.get("session_start_time")
                            else datetime.strptime(
                                query_dict.get("session_start_time"),
                                "%Y-%m-%d %H:%M:%S+%f",
                            )
                        )
                        query_dict["aborted"] = query_dict["sql_state_code"] == "00000"
                        if "statement" in query_dict["message"]:
                            query_dict["message"] = query_dict["message"].split(":")[1]
                        table_query_list.append(
                            TableQuery(
                                query=query_dict["message"],
                                userName=query_dict.get("user_name", ""),
                                startTime=query_dict.get("session_start_time", ""),
                                endTime=query_dict.get("log_time", ""),
                                analysisDate=analysis_date,
                                aborted=self.get_aborted_status(query_dict),
                                databaseName=self.get_database_name(query_dict),
                                serviceName=self.config.serviceName,
                                databaseSchema=self.get_schema_name(query_dict),
                            )
                        )
                yield TableQueries(queries=table_query_list)
            else:
                yield from self.process_table_query()
        except Exception as err:
            logger.error(f"Source usage processing error - {err}")
            logger.debug(traceback.format_exc())
    def process_table_query(self) -> Optional[Iterable[TableQuery]]:
        """
        Method to yield TableQueries
        """
        try:
        queries = []
        data = self.client.list_query_history(
            start_date=self.start,
@ -109,9 +57,8 @@ class DatabricksUsageSource(DatabricksQueryParserSource, UsageSource):
                    )
            except Exception as err:
                logger.debug(traceback.format_exc())
-                    logger.error(str(err))
+                logger.warning(
                    f"Failed to process query {row.get('query_text')} due to: {err}"
                )
        yield TableQueries(queries=queries)
        except Exception as err:
            logger.error(f"Source usage processing error - {err}")
            logger.debug(traceback.format_exc())
--- a/ingestion/src/metadata/ingestion/source/database/lineage_source.py
+++ b/ingestion/src/metadata/ingestion/source/database/lineage_source.py
@ -16,13 +16,10 @@ import traceback
 from abc import ABC
 from typing import Iterable, Iterator, Optional
 from sqlalchemy.engine import Engine
 from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
 from metadata.generated.schema.type.tableQuery import TableQuery
 from metadata.ingestion.lineage.models import ConnectionTypeDialectMapper
 from metadata.ingestion.lineage.sql_lineage import get_lineage_by_query
 from metadata.ingestion.source.connections import get_connection
 from metadata.ingestion.source.database.query_parser_source import QueryParserSource
 from metadata.utils.logger import ingestion_logger
@ -41,14 +38,11 @@ class LineageSource(QueryParserSource, ABC):
    - schema
    """
-    def get_table_query(self) -> Optional[Iterator[TableQuery]]:
+    def yield_table_queries_from_logs(self) -> Optional[Iterator[TableQuery]]:
        """
-        If queryLogFilePath available in config iterate through log file
+        Method to handle the usage from query logs
        otherwise execute the sql query to fetch TableQuery data.
        This is a simplified version of the UsageSource query parsing.
        """
-        if self.config.sourceConfig.config.queryLogFilePath:
+        try:
            with open(
                self.config.sourceConfig.config.queryLogFilePath, "r", encoding="utf-8"
            ) as file:
@ -60,25 +54,31 @@ class LineageSource(QueryParserSource, ABC):
                        serviceName=self.config.serviceName,
                        databaseSchema=self.get_schema_name(query_dict),
                    )
        except Exception as err:
            logger.debug(traceback.format_exc())
            logger.warning(f"Failed to read queries form log file due to: {err}")
    def get_table_query(self) -> Optional[Iterator[TableQuery]]:
        """
        If queryLogFilePath available in config iterate through log file
        otherwise execute the sql query to fetch TableQuery data.
        This is a simplified version of the UsageSource query parsing.
        """
        if self.config.sourceConfig.config.queryLogFilePath:
            yield from self.yield_table_queries_from_logs()
        else:
            logger.info(
                f"Scanning query logs for {self.start.date()} - {self.end.date()}"
            )
-            try:
+            yield from self.yield_table_query()
                engine = get_connection(self.service_connection)
                yield from self.yield_table_query(engine)
-            except Exception as exc:
+    def yield_table_query(self) -> Iterator[TableQuery]:
                logger.debug(traceback.format_exc())
                logger.error(f"Source usage processing error: {exc}")
    def yield_table_query(self, engine: Engine) -> Iterator[TableQuery]:
        """
        Given an engine, iterate over the query results to
        yield a TableQuery with query parsing info
        """
-        with engine.connect() as conn:
+        with self.engine.connect() as conn:
            rows = conn.execute(
                self.get_sql_statement(
                    start_time=self.start,
--- a/ingestion/src/metadata/ingestion/source/database/usage_source.py
+++ b/ingestion/src/metadata/ingestion/source/database/usage_source.py
@ -17,10 +17,7 @@ from abc import ABC
 from datetime import datetime, timedelta
 from typing import Iterable, Optional
 from sqlalchemy.engine import Engine
 from metadata.generated.schema.type.tableQuery import TableQueries, TableQuery
 from metadata.ingestion.source.connections import get_connection
 from metadata.ingestion.source.database.query_parser_source import QueryParserSource
 from metadata.utils.logger import ingestion_logger
@ -34,12 +31,11 @@ class UsageSource(QueryParserSource, ABC):
    Parse a query log to extract a `TableQuery` object
    """
-    def get_table_query(self) -> Optional[Iterable[TableQuery]]:
+    def yield_table_queries_from_logs(self) -> Optional[Iterable[TableQuery]]:
        """
-        If queryLogFilePath available in config iterate through log file
+        Method to handle the usage from query logs
        otherwise execute the sql query to fetch TableQuery data
        """
-        if self.config.sourceConfig.config.queryLogFilePath:
+        try:
            query_list = []
            with open(
                self.config.sourceConfig.config.queryLogFilePath, "r", encoding="utf-8"
@ -68,12 +64,21 @@ class UsageSource(QueryParserSource, ABC):
                        )
                    )
            yield TableQueries(queries=query_list)
        except Exception as err:
            logger.debug(traceback.format_exc())
            logger.warning(f"Failed to read queries form log file due to: {err}")
    def get_table_query(self) -> Optional[Iterable[TableQuery]]:
        """
        If queryLogFilePath available in config iterate through log file
        otherwise execute the sql query to fetch TableQuery data
        """
        if self.config.sourceConfig.config.queryLogFilePath:
            yield from self.yield_table_queries_from_logs()
        else:
-            engine = get_connection(self.service_connection)
+            yield from self.yield_table_queries()
            yield from self.yield_table_queries(engine)
-    def yield_table_queries(self, engine: Engine):
+    def yield_table_queries(self):
        """
        Given an Engine, iterate over the day range and
        query the results
@ -85,7 +90,7 @@ class UsageSource(QueryParserSource, ABC):
                f"{(self.start + timedelta(days=days + 1)).date()}"
            )
            try:
-                with engine.connect() as conn:
+                with self.engine.connect() as conn:
                    rows = conn.execute(
                        self.get_sql_statement(
                            start_time=self.start + timedelta(days=days),
--- a/ingestion/src/metadata/ingestion/stage/table_usage.py
+++ b/ingestion/src/metadata/ingestion/stage/table_usage.py
@ -25,7 +25,6 @@ from metadata.generated.schema.entity.services.connections.metadata.openMetadata
    OpenMetadataConnection,
 )
 from metadata.generated.schema.entity.teams.user import User
 from metadata.generated.schema.type.entityReference import EntityReference
 from metadata.generated.schema.type.queryParserData import QueryParserData
 from metadata.generated.schema.type.tableUsageCount import TableUsageCount
 from metadata.ingestion.api.stage import Stage
@ -86,18 +85,7 @@ class TableUsageStage(Stage[QueryParserData]):
        if username:
            user = self.metadata.get_by_name(entity=User, fqn=username)
            if user:
-                return [
+                return [user.fullyQualifiedName.__root__]
                    EntityReference(
                        id=user.id,
                        type="user",
                        name=user.name.__root__,
                        fullyQualifiedName=user.fullyQualifiedName.__root__,
                        description=user.description,
                        displayName=user.displayName,
                        deleted=user.deleted,
                        href=user.href,
                    )
                ]
        return []
    def _add_sql_query(self, record, table):
--- a/ingestion/src/metadata/utils/constants.py
+++ b/ingestion/src/metadata/utils/constants.py
@ -28,3 +28,7 @@ ES_SOURCE_TO_ES_OBJ_ARGS = {
    "useSSL": "use_ssl",
    "verifyCerts": "verify_certs",
 }
 QUERY_WITH_OM_VERSION = '/* {"app": "OpenMetadata"'
 QUERY_WITH_DBT = '/* {"app": "dbt"'
--- a/openmetadata-docs/content/connectors/database/athena/airflow.md
+++ b/openmetadata-docs/content/connectors/database/athena/airflow.md
@ -9,7 +9,7 @@ slug: /connectors/database/athena/airflow
 | Stage | Metadata |Query Usage | Data Profiler | Data Quality | Lineage | DBT | Supported Versions |
 |:------:|:------:|:-----------:|:-------------:|:------------:|:-------:|:---:|:------------------:|
-|  PROD  |   ✅   |      ❌      |       ✅       |       ✅      |    Partially via Views    |  ✅  |  --  |
+|  PROD  |   ✅   |     ✅ (1.0 release onwards)      |       ✅       |       ✅      |    ✅ (1.0 release onwards)    |  ✅  |  --  |
 </Table>
@ -17,7 +17,7 @@ slug: /connectors/database/athena/airflow
 | Lineage | Table-level | Column-level |
 |:------:|:-----------:|:-------------:|
-| Partially via Views | ✅ | ✅ |
+| ✅ (1.0 release onwards) | ✅  | ✅  |
 </Table>
@ -27,8 +27,10 @@ In this section, we provide guides and references to use the Athena connector.
 Configure and schedule Athena metadata and profiler workflows from the OpenMetadata UI:
 - [Requirements](#requirements)
 - [Metadata Ingestion](#metadata-ingestion)
 - [Query Usage](#query-usage)
 - [Data Profiler](#data-profiler)
 - [dbt Integration](#dbt-integration)
 - [Lineage](#lineage)
 ## Requirements
@ -355,6 +357,83 @@ with DAG(
 Note that from connector to connector, this recipe will always be the same. 
 By updating the YAML configuration, you will be able to extract metadata from different sources.
 ## Query Usage
 To ingest the Query Usage, the `serviceConnection` configuration will remain the same.
 However, the `sourceConfig` is now modeled after this JSON Schema.
 ### 1. Define the YAML Config
 This is a sample config for BigQuery Usage:
 ```yaml
 source:
  type: athena-usage
  serviceName: <service name>
  serviceConnection:
    config:
      type: Athena
      awsConfig:
        awsAccessKeyId: KEY
        awsSecretAccessKey: SECRET
        awsRegion: us-east-2
        # endPointURL: https://athena.us-east-2.amazonaws.com/
        # awsSessionToken: TOKEN
      s3StagingDir: s3 directory for datasource
      workgroup: workgroup name
  sourceConfig:
    config:
      # Number of days to look back
      queryLogDuration: 7
      # This is a directory that will be DELETED after the usage runs
      stageFileLocation: <path to store the stage file>
      # resultLimit: 1000
      # If instead of getting the query logs from the database we want to pass a file with the queries
      # queryLogFilePath: path-to-file
 processor:
  type: query-parser
  config: {}
 stage:
  type: table-usage
  config:
    filename: /tmp/athena_usage
 bulkSink:
  type: metadata-usage
  config:
    filename: /tmp/athena_usage
 workflowConfig:
  # loggerLevel: DEBUG  # DEBUG, INFO, WARN or ERROR
  openMetadataServerConfig:
    hostPort: <OpenMetadata host and port>
    authProvider: <OpenMetadata auth provider>
 ```
 #### Source Configuration - Service Connection
 You can find all the definitions and types for the `serviceConnection` [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/bigQueryConnection.json).
 They are the same as metadata ingestion.
 #### Source Configuration - Source Config
 The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceQueryUsagePipeline.json).
 - `queryLogDuration`: Configuration to tune how far we want to look back in query logs to process usage data.
 - `resultLimit`: Configuration to set the limit for query logs
 #### Processor, Stage and Bulk Sink
 To specify where the staging files will be located.
 Note that the location is a directory that will be cleaned at the end of the ingestion.
 #### Workflow Configuration
 The same as the metadata ingestion.
 ### 2. Run with the CLI
 For the usage workflow creation, the Airflow file will look the same as for the metadata ingestion. Updating the YAML configuration will be enough.
 ## Data Profiler
 The Data Profiler workflow will be using the `orm-profiler` processor.
@ -537,3 +616,7 @@ with DAG(
 ## dbt Integration
 You can learn more about how to ingest dbt models' definitions and their lineage [here](connectors/ingestion/workflows/dbt).
 ## Lineage
 You can learn more about how to ingest lineage [here](/connectors/ingestion/workflows/lineage).
--- a/openmetadata-docs/content/connectors/database/athena/cli.md
+++ b/openmetadata-docs/content/connectors/database/athena/cli.md
@ -5,19 +5,20 @@ slug: /connectors/database/athena/cli
 # Run Athena using the metadata CLI
 <Table>
 | Stage | Metadata |Query Usage | Data Profiler | Data Quality | Lineage | DBT | Supported Versions |
 |:------:|:------:|:-----------:|:-------------:|:------------:|:-------:|:---:|:------------------:|
-|  PROD  |   ✅   |      ❌      |       ✅       |       ✅      |    Partially via Views    |  ✅  |  --  |
+|  PROD  |   ✅   |     ✅ (1.0 release onwards)      |       ✅       |       ✅      |    ✅ (1.0 release onwards)    |  ✅  |  --  |
 </Table>
 <Table>
 | Lineage | Table-level | Column-level |
-| :-----------------: | :---------: | :----------: |
+|:------:|:-----------:|:-------------:|
-| Partially via Views |      ✅      |      ✅       |
+| ✅ (1.0 release onwards) | ✅  | ✅  |
 </Table>
@ -27,8 +28,10 @@ In this section, we provide guides and references to use the Athena connector.
 Configure and schedule Athena metadata and profiler workflows from the OpenMetadata UI:
 - [Requirements](#requirements)
 - [Metadata Ingestion](#metadata-ingestion)
 - [Query Usage](#query-usage)
 - [Data Profiler](#data-profiler)
 - [dbt Integration](#dbt-integration)
 - [Lineage](#lineage)
 ## Requirements
@ -308,6 +311,84 @@ metadata ingest -c <path-to-yaml>
 Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration,
 you will be able to extract metadata from different sources.
 ## Query Usage
 To ingest the Query Usage, the `serviceConnection` configuration will remain the same.
 However, the `sourceConfig` is now modeled after this JSON Schema.
 ### 1. Define the YAML Config
 This is a sample config for BigQuery Usage:
 ```yaml
 source:
  type: athena-usage
  serviceName: <service name>
  serviceConnection:
    config:
      type: Athena
      awsConfig:
        awsAccessKeyId: KEY
        awsSecretAccessKey: SECRET
        awsRegion: us-east-2
        # endPointURL: https://athena.us-east-2.amazonaws.com/
        # awsSessionToken: TOKEN
      s3StagingDir: s3 directory for datasource
      workgroup: workgroup name
  sourceConfig:
    config:
      # Number of days to look back
      queryLogDuration: 7
      # This is a directory that will be DELETED after the usage runs
      stageFileLocation: <path to store the stage file>
      # resultLimit: 1000
      # If instead of getting the query logs from the database we want to pass a file with the queries
      # queryLogFilePath: path-to-file
 processor:
  type: query-parser
  config: {}
 stage:
  type: table-usage
  config:
    filename: /tmp/athena_usage
 bulkSink:
  type: metadata-usage
  config:
    filename: /tmp/athena_usage
 workflowConfig:
  # loggerLevel: DEBUG  # DEBUG, INFO, WARN or ERROR
  openMetadataServerConfig:
    hostPort: <OpenMetadata host and port>
    authProvider: <OpenMetadata auth provider>
 ```
 #### Source Configuration - Service Connection
 You can find all the definitions and types for the `serviceConnection` [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/bigQueryConnection.json).
 They are the same as metadata ingestion.
 #### Source Configuration - Source Config
 The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/databaseServiceQueryUsagePipeline.json).
 - `queryLogDuration`: Configuration to tune how far we want to look back in query logs to process usage data.
 - `resultLimit`: Configuration to set the limit for query logs
 #### Processor, Stage and Bulk Sink
 To specify where the staging files will be located.
 Note that the location is a directory that will be cleaned at the end of the ingestion.
 #### Workflow Configuration
 The same as the metadata ingestion.
 ### 2. Run with the CLI
 For the usage workflow creation, the Airflow file will look the same as for the metadata ingestion. Updating the YAML configuration will be enough.
 ## Data Profiler
 The Data Profiler workflow will be using the `orm-profiler` processor. 
@ -451,3 +532,7 @@ Note how instead of running `ingest`, we are using the `profile` command to sele
 ## dbt Integration
 You can learn more about how to ingest dbt models' definitions and their lineage [here](/connectors/ingestion/workflows/dbt).
 ## Lineage
 You can learn more about how to ingest lineage [here](/connectors/ingestion/workflows/lineage).
--- a/openmetadata-docs/content/connectors/database/athena/index.md
+++ b/openmetadata-docs/content/connectors/database/athena/index.md
@ -9,7 +9,7 @@ slug: /connectors/database/athena
 | Stage | Metadata |Query Usage | Data Profiler | Data Quality | Lineage | DBT | Supported Versions |
 |:------:|:------:|:-----------:|:-------------:|:------------:|:-------:|:---:|:------------------:|
-|  PROD  |   ✅   |      ❌      |       ✅       |       ✅      |    1.0 ( upcoming release )    |  ✅  |  --  |
+|  PROD  |   ✅   |     ✅ (1.0 release onwards)      |       ✅       |       ✅      |    ✅ (1.0 release onwards)    |  ✅  |  --  |
 </Table>
@ -17,7 +17,7 @@ slug: /connectors/database/athena
 | Lineage | Table-level | Column-level |
 |:------:|:-----------:|:-------------:|
-| 1.0 ( upcoming release ) | TBD | TBD |
+| ✅ (1.0 release onwards) | ✅  | ✅  |
 </Table>
@ -26,8 +26,10 @@ In this section, we provide guides and references to use the Athena connector.
 Configure and schedule Athena metadata and profiler workflows from the OpenMetadata UI:
 - [Requirements](#requirements)
 - [Metadata Ingestion](#metadata-ingestion)
 - [Query Usage](#query-usage)
 - [Data Profiler](#data-profiler)
- [dbt Integration](#dbt-integration)
+- [Data Quality](#data-quality)
 - [Lineage](#lineage)
 If you don't want to use the OpenMetadata Ingestion container to configure the workflows via the UI, then you can check
 the following docs to connect using Airflow SDK or with the CLI.
@ -238,6 +240,15 @@ caption="Edit and Deploy the Ingestion Pipeline"
 From the Connection tab, you can also Edit the Service if needed.
 ## Query Usage
 <Tile
 icon="manage_accounts"
 title="Usage Workflow"
 text="Learn more about how to configure the Usage Workflow to ingest Query information from the UI."
 link="/connectors/ingestion/workflows/usage"
 />
 ## Data Profiler
 <Tile
@ -264,3 +275,12 @@ title="dbt Integration"
 text="Learn more about how to ingest dbt models' definitions and their lineage."
 link="/connectors/ingestion/workflows/dbt"
 />
 ## Lineage
 <Tile
 icon="air"
 title="Lineage Workflow"
 text="Learn more about how to configure the Lineage from the UI."
 link="/connectors/ingestion/workflows/lineage"
 />
--- a/openmetadata-docs/content/connectors/ingestion/lineage/index.md
+++ b/openmetadata-docs/content/connectors/ingestion/lineage/index.md
@ -79,6 +79,7 @@ From 0.12 onwards, there is a separated Lineage Workflow that will take care of
 The main difference here is between those sources that provide internal access to query logs and those that do not. For
 services such as:
 - [Athena](/connectors/database/athena) (supported with 1.0 release onwards)
 - [BigQuery](/connectors/database/bigquery)
 - [Snowflake](/connectors/database/snowflake)
 - [MSSQL](/connectors/database/mssql)
--- a/openmetadata-docs/content/connectors/ingestion/workflows/usage/index.md
+++ b/openmetadata-docs/content/connectors/ingestion/workflows/usage/index.md
@ -7,6 +7,7 @@ slug: /connectors/ingestion/workflows/usage
 Learn how to configure the Usage workflow from the UI to ingest Query history data from your data sources.
 This workflow is available ONLY for the following connectors:
 - [Athena](/connectors/database/athena) (supported with 1.0 release onwards)
 - [BigQuery](/connectors/database/bigquery)
 - [Snowflake](/connectors/database/snowflake)
 - [MSSQL](/connectors/database/mssql)
--- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/athenaConnection.json
+++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/athenaConnection.json
@ -71,6 +71,10 @@
    "supportsProfiler": {
      "title": "Supports Profiler",
      "$ref": "../connectionBasicType.json#/definitions/supportsProfiler"
    },
    "supportsQueryComment": {
      "title": "Supports Query Comment",
      "$ref": "../connectionBasicType.json#/definitions/supportsQueryComment"
    }
  },
  "additionalProperties": false,