2025-04-03 10:39:47 +05:30
|
|
|
# Copyright 2025 Collate
|
|
|
|
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
2022-01-10 09:36:08 +01:00
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2025-04-03 10:39:47 +05:30
|
|
|
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
2022-01-10 09:36:08 +01:00
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
OpenMetadata utils tests
|
|
|
|
"""
|
2025-07-25 18:26:44 +05:30
|
|
|
import base64
|
|
|
|
import json
|
2022-01-10 09:36:08 +01:00
|
|
|
from unittest import TestCase
|
|
|
|
|
|
|
|
from metadata.generated.schema.entity.data.mlmodel import MlModel
|
2025-03-05 07:11:17 +01:00
|
|
|
from metadata.generated.schema.entity.data.table import Column, Table
|
2022-01-10 09:36:08 +01:00
|
|
|
from metadata.generated.schema.type import basic
|
2025-03-05 07:11:17 +01:00
|
|
|
from metadata.generated.schema.type.entityReference import EntityReference
|
2023-01-02 13:52:27 +01:00
|
|
|
from metadata.ingestion.connections.headers import render_query_header
|
2025-03-05 07:11:17 +01:00
|
|
|
from metadata.ingestion.ometa.utils import (
|
|
|
|
build_entity_reference,
|
2025-07-25 18:26:44 +05:30
|
|
|
decode_jwt_token,
|
2025-03-05 07:11:17 +01:00
|
|
|
format_name,
|
|
|
|
get_entity_type,
|
|
|
|
model_str,
|
|
|
|
)
|
|
|
|
|
|
|
|
MOCK_TABLE = Table(
|
|
|
|
id="c3eb265f-5445-4ad3-ba5e-797d3a3071bb",
|
|
|
|
name="customers",
|
|
|
|
description="description\nwith new line",
|
|
|
|
tableType="Regular",
|
|
|
|
columns=[
|
|
|
|
Column(
|
|
|
|
name="customer_id",
|
|
|
|
dataType="INT",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="first_name",
|
|
|
|
dataType="STRING",
|
|
|
|
),
|
|
|
|
Column(
|
|
|
|
name="last_name",
|
|
|
|
dataType="STRING",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
databaseSchema=EntityReference(
|
|
|
|
id="c3eb265f-5445-4ad3-ba5e-797d3a3071bb", type="databaseSchema"
|
|
|
|
),
|
|
|
|
)
|
2022-01-10 09:36:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
class OMetaUtilsTest(TestCase):
|
|
|
|
def test_format_name(self):
|
|
|
|
"""
|
|
|
|
Check we are properly formatting names
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.assertEqual(format_name("random"), "random")
|
|
|
|
self.assertEqual(format_name("ran dom"), "ran_dom")
|
|
|
|
self.assertEqual(format_name("ran_(dom"), "ran__dom")
|
|
|
|
|
|
|
|
def test_get_entity_type(self):
|
|
|
|
"""
|
|
|
|
Check that we return a string or the class name
|
|
|
|
"""
|
|
|
|
|
|
|
|
self.assertEqual(get_entity_type("hello"), "hello")
|
|
|
|
self.assertEqual(get_entity_type(MlModel), "mlmodel")
|
|
|
|
|
2022-03-30 08:54:47 +02:00
|
|
|
def test_model_str(self):
|
2022-01-10 09:36:08 +01:00
|
|
|
"""
|
|
|
|
Return Uuid as str
|
|
|
|
"""
|
|
|
|
|
2022-03-30 08:54:47 +02:00
|
|
|
self.assertEqual(model_str("random"), "random")
|
2022-01-10 09:36:08 +01:00
|
|
|
self.assertEqual(
|
2024-06-05 21:18:37 +02:00
|
|
|
model_str(basic.Uuid("9fc58e81-7412-4023-a298-59f2494aab9d")),
|
2022-01-10 09:36:08 +01:00
|
|
|
"9fc58e81-7412-4023-a298-59f2494aab9d",
|
|
|
|
)
|
2022-03-30 08:54:47 +02:00
|
|
|
|
2024-06-05 21:18:37 +02:00
|
|
|
self.assertEqual(model_str(basic.EntityName("EntityName")), "EntityName")
|
|
|
|
self.assertEqual(model_str(basic.FullyQualifiedEntityName("FQDN")), "FQDN")
|
2022-07-23 12:31:26 +02:00
|
|
|
|
|
|
|
def test_render_query_headers_builds_the_right_string(self) -> None:
|
|
|
|
assert (
|
|
|
|
render_query_header("0.0.1")
|
|
|
|
== '/* {"app": "OpenMetadata", "version": "0.0.1"} */'
|
|
|
|
)
|
2025-03-05 07:11:17 +01:00
|
|
|
|
|
|
|
def test_build_entity_reference(self) -> None:
|
|
|
|
"""Check we're building the right class"""
|
|
|
|
res = build_entity_reference(MOCK_TABLE)
|
|
|
|
self.assertEqual(res.type, "table")
|
|
|
|
self.assertEqual(res.id, MOCK_TABLE.id)
|
2025-07-25 18:26:44 +05:30
|
|
|
|
|
|
|
def test_decode_jwt_token_valid(self):
|
|
|
|
"""Test decoding a valid JWT token"""
|
|
|
|
# Create a mock JWT payload
|
|
|
|
payload = {
|
|
|
|
"sub": "testuser",
|
|
|
|
"email": "testuser@example.com",
|
|
|
|
"name": "Test User",
|
|
|
|
"iat": 1640995200,
|
|
|
|
"exp": 1641081600,
|
|
|
|
}
|
|
|
|
|
|
|
|
# Encode the payload
|
|
|
|
payload_encoded = (
|
|
|
|
base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8"))
|
|
|
|
.decode("utf-8")
|
|
|
|
.rstrip("=")
|
|
|
|
)
|
|
|
|
|
|
|
|
# Create a mock JWT token (header.payload.signature)
|
|
|
|
jwt_token = f"eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.{payload_encoded}.signature"
|
|
|
|
|
|
|
|
result = decode_jwt_token(jwt_token)
|
|
|
|
|
|
|
|
self.assertIsNotNone(result)
|
|
|
|
self.assertEqual(result["sub"], "testuser")
|
|
|
|
self.assertEqual(result["email"], "testuser@example.com")
|
|
|
|
self.assertEqual(result["name"], "Test User")
|
|
|
|
|
|
|
|
def test_decode_jwt_token_with_padding(self):
|
|
|
|
"""Test decoding a JWT token that needs padding"""
|
|
|
|
# Create a payload that will need padding
|
|
|
|
payload = {"sub": "admin", "email": "admin@openmetadata.org"}
|
|
|
|
|
|
|
|
# Encode without padding
|
|
|
|
payload_encoded = (
|
|
|
|
base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8"))
|
|
|
|
.decode("utf-8")
|
|
|
|
.rstrip("=")
|
|
|
|
)
|
|
|
|
|
|
|
|
jwt_token = f"header.{payload_encoded}.signature"
|
|
|
|
|
|
|
|
result = decode_jwt_token(jwt_token)
|
|
|
|
|
|
|
|
self.assertIsNotNone(result)
|
|
|
|
self.assertEqual(result["sub"], "admin")
|
|
|
|
self.assertEqual(result["email"], "admin@openmetadata.org")
|
|
|
|
|
|
|
|
def test_decode_jwt_token_invalid_format(self):
|
|
|
|
"""Test decoding an invalid JWT token format"""
|
|
|
|
# Test with wrong number of parts
|
|
|
|
invalid_token = "header.payload" # Missing signature
|
|
|
|
result = decode_jwt_token(invalid_token)
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
# Test with too many parts
|
|
|
|
invalid_token = "header.payload.signature.extra"
|
|
|
|
result = decode_jwt_token(invalid_token)
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
def test_decode_jwt_token_invalid_base64(self):
|
|
|
|
"""Test decoding a JWT token with invalid base64 in payload"""
|
|
|
|
invalid_token = "header.invalid-base64.signature"
|
|
|
|
result = decode_jwt_token(invalid_token)
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
def test_decode_jwt_token_invalid_json(self):
|
|
|
|
"""Test decoding a JWT token with invalid JSON in payload"""
|
|
|
|
# Create invalid JSON payload
|
|
|
|
invalid_json = "invalid json content"
|
|
|
|
payload_encoded = base64.urlsafe_b64encode(invalid_json.encode("utf-8")).decode(
|
|
|
|
"utf-8"
|
|
|
|
)
|
|
|
|
|
|
|
|
jwt_token = f"header.{payload_encoded}.signature"
|
|
|
|
|
|
|
|
result = decode_jwt_token(jwt_token)
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
def test_decode_jwt_token_empty_payload(self):
|
|
|
|
"""Test decoding a JWT token with empty payload"""
|
|
|
|
# Create empty payload
|
|
|
|
payload_encoded = base64.urlsafe_b64encode(
|
|
|
|
json.dumps({}).encode("utf-8")
|
|
|
|
).decode("utf-8")
|
|
|
|
|
|
|
|
jwt_token = f"header.{payload_encoded}.signature"
|
|
|
|
|
|
|
|
result = decode_jwt_token(jwt_token)
|
|
|
|
self.assertIsNotNone(result)
|
|
|
|
self.assertEqual(result, {})
|
|
|
|
|
|
|
|
def test_decode_jwt_token_none_input(self):
|
|
|
|
"""Test decoding with None input"""
|
|
|
|
result = decode_jwt_token(None)
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
def test_decode_jwt_token_empty_string(self):
|
|
|
|
"""Test decoding with empty string input"""
|
|
|
|
result = decode_jwt_token("")
|
|
|
|
self.assertIsNone(result)
|
|
|
|
|
|
|
|
def test_decode_jwt_token_real_world_example(self):
|
|
|
|
"""Test with a realistic JWT token structure"""
|
|
|
|
# Simulate a real OpenMetadata JWT token payload
|
|
|
|
payload = {
|
|
|
|
"sub": "ingestion-bot",
|
|
|
|
"iss": "open-metadata.org",
|
|
|
|
"iat": 1663938462,
|
|
|
|
"email": "ingestion-bot@open-metadata.org",
|
|
|
|
"isBot": False,
|
|
|
|
}
|
|
|
|
|
|
|
|
payload_encoded = (
|
|
|
|
base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8"))
|
|
|
|
.decode("utf-8")
|
|
|
|
.rstrip("=")
|
|
|
|
)
|
|
|
|
|
|
|
|
jwt_token = f"eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.{payload_encoded}.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg"
|
|
|
|
|
|
|
|
result = decode_jwt_token(jwt_token)
|
|
|
|
|
|
|
|
self.assertIsNotNone(result)
|
|
|
|
self.assertEqual(result["sub"], "ingestion-bot")
|
|
|
|
self.assertEqual(result["iss"], "open-metadata.org")
|
|
|
|
self.assertEqual(result["email"], "ingestion-bot@open-metadata.org")
|
|
|
|
self.assertEqual(result["isBot"], False)
|