datahub/metadata-ingestion/tests/unit/cli/test_graphql_cli.py
2025-10-14 21:04:44 -07:00

2452 lines
88 KiB
Python

import json
import os
import tempfile
from pathlib import Path
from typing import Any
from unittest.mock import Mock, patch
import click
import pytest
from click.testing import CliRunner
from datahub.cli.graphql_cli import (
_collect_nested_types,
_convert_describe_to_json,
_convert_operation_to_json,
_convert_operations_list_to_json,
_convert_type_details_to_json,
_convert_type_to_json,
_dict_to_graphql_input,
_extract_base_type_name,
_fetch_type_recursive,
_find_operation_by_name,
_find_type_by_name,
_format_graphql_type,
_format_operation_details,
_format_operation_list,
_format_recursive_types,
_format_single_type_fields,
_generate_operation_query,
_is_file_path,
_load_content_or_file,
_parse_graphql_operations_from_files,
_parse_operations_from_content,
_parse_variables,
graphql,
)
class TestHelperFunctions:
"""Test helper functions in graphql_cli module."""
def test_is_file_path_with_existing_file(self):
"""Test that _is_file_path returns True for existing files."""
with tempfile.NamedTemporaryFile(suffix=".graphql", delete=False) as tmp:
tmp.write(b"query { me { username } }")
tmp.flush()
assert _is_file_path(tmp.name)
assert _is_file_path("./test.graphql") is False # doesn't exist
# Clean up
Path(tmp.name).unlink()
def test_is_file_path_with_non_existing_file(self):
"""Test that _is_file_path returns False for non-existing files."""
assert _is_file_path("./non-existent.graphql") is False
assert _is_file_path("/path/to/nowhere.json") is False
assert _is_file_path("query { me }") is False
def test_is_file_path_with_short_strings(self):
"""Test that _is_file_path handles short strings correctly."""
assert _is_file_path("") is False
assert _is_file_path("a") is False
assert _is_file_path("ab") is False
def test_is_file_path_with_relative_paths(self):
"""Test that _is_file_path handles relative paths correctly."""
import os
import tempfile
# Create a temporary directory and file for testing
with tempfile.TemporaryDirectory() as temp_dir:
# Create a test file in the temp directory
test_file = Path(temp_dir) / "test.graphql"
test_file.write_text("query { me { username } }")
# Change to the temp directory to test relative paths
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
# Test simple filename (exists in current directory)
assert _is_file_path("test.graphql") is True
assert _is_file_path("nonexistent.graphql") is False
# Create a subdirectory for testing relative paths
sub_dir = Path(temp_dir) / "subdir"
sub_dir.mkdir()
sub_file = sub_dir / "sub.graphql"
sub_file.write_text("query { search }")
# Test relative path with ./
assert _is_file_path("./test.graphql") is True
assert _is_file_path("./subdir/sub.graphql") is True
assert _is_file_path("./nonexistent.graphql") is False
# Change to subdirectory to test ../
os.chdir(sub_dir)
assert _is_file_path("../test.graphql") is True
assert _is_file_path("../nonexistent.graphql") is False
finally:
os.chdir(original_cwd)
def test_is_file_path_with_absolute_paths(self):
"""Test that _is_file_path handles absolute paths correctly."""
with tempfile.NamedTemporaryFile(suffix=".graphql", delete=False) as tmp:
tmp.write(b"query { me { username } }")
tmp.flush()
# Test absolute path
assert _is_file_path(tmp.name) is True
# Clean up
Path(tmp.name).unlink()
# Test non-existent absolute path
assert _is_file_path(tmp.name) is False
def test_is_file_path_with_json_files(self):
"""Test that _is_file_path works with JSON files."""
import os
import tempfile
with tempfile.TemporaryDirectory() as temp_dir:
test_file = Path(temp_dir) / "variables.json"
test_file.write_text('{"key": "value"}')
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
assert _is_file_path("variables.json") is True
assert _is_file_path("./variables.json") is True
finally:
os.chdir(original_cwd)
def test_is_file_path_with_graphql_content(self):
"""Test that _is_file_path correctly identifies GraphQL content vs file paths."""
# These should be identified as GraphQL content, not file paths
graphql_queries = [
"query { me { username } }",
"mutation { deleteEntity(urn: $urn) }",
"query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }",
'{ search(input: { type: TAG, query: "*" }) { total } }',
]
for query in graphql_queries:
assert _is_file_path(query) is False
def test_load_content_or_file_with_file(self):
"""Test loading content from a file."""
content = "query { me { username } }"
with tempfile.NamedTemporaryFile(
mode="w", suffix=".graphql", delete=False
) as tmp:
tmp.write(content)
tmp.flush()
result = _load_content_or_file(tmp.name)
assert result == content
# Clean up
Path(tmp.name).unlink()
def test_load_content_or_file_with_literal(self):
"""Test that literal content is returned as-is."""
content = "query { me { username } }"
result = _load_content_or_file(content)
assert result == content
def test_load_content_or_file_with_relative_paths(self):
"""Test loading content from files using relative paths."""
import os
import tempfile
content1 = "query { me { username } }"
content2 = "query { search(input: { type: TAG }) { total } }"
with tempfile.TemporaryDirectory() as temp_dir:
# Create test files
test_file = Path(temp_dir) / "test.graphql"
test_file.write_text(content1)
sub_dir = Path(temp_dir) / "subdir"
sub_dir.mkdir()
sub_file = sub_dir / "sub.graphql"
sub_file.write_text(content2)
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
# Test simple filename
result = _load_content_or_file("test.graphql")
assert result == content1
# Test relative path with ./
result = _load_content_or_file("./test.graphql")
assert result == content1
result = _load_content_or_file("./subdir/sub.graphql")
assert result == content2
# Change to subdirectory to test ../
os.chdir(sub_dir)
result = _load_content_or_file("../test.graphql")
assert result == content1
finally:
os.chdir(original_cwd)
def test_load_content_or_file_with_absolute_paths(self):
"""Test loading content from files using absolute paths."""
content = "query { me { username } }"
with tempfile.NamedTemporaryFile(
mode="w", suffix=".graphql", delete=False
) as tmp:
tmp.write(content)
tmp.flush()
# Test absolute path
result = _load_content_or_file(tmp.name)
assert result == content
# Clean up
Path(tmp.name).unlink()
def test_load_content_or_file_error_handling(self):
"""Test error handling when file path looks like a file but doesn't exist."""
import os
import tempfile
with tempfile.TemporaryDirectory() as temp_dir:
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
# Files that don't exist should be treated as literal content, not files
# This is the expected behavior based on how _is_file_path works
result = _load_content_or_file("nonexistent.graphql")
assert result == "nonexistent.graphql"
result = _load_content_or_file("../nonexistent.graphql")
assert result == "../nonexistent.graphql"
finally:
os.chdir(original_cwd)
def test_parse_variables_with_valid_json(self):
"""Test parsing valid JSON variables."""
variables_str = '{"key": "value", "number": 42}'
result = _parse_variables(variables_str)
assert result == {"key": "value", "number": 42}
def test_parse_variables_with_none(self):
"""Test parsing None variables."""
assert _parse_variables(None) is None
assert _parse_variables("") is None
def test_parse_variables_with_invalid_json(self):
"""Test parsing invalid JSON raises ClickException."""
from click import ClickException
with pytest.raises(ClickException, match="Invalid JSON in variables"):
_parse_variables('{"invalid": json}')
def test_parse_variables_from_file(self):
"""Test parsing variables from a JSON file."""
variables = {"key": "value", "number": 42}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
json.dump(variables, tmp)
tmp.flush()
result = _parse_variables(tmp.name)
assert result == variables
# Clean up
Path(tmp.name).unlink()
def test_format_graphql_type_simple(self):
"""Test formatting simple GraphQL types."""
type_info = {"kind": "SCALAR", "name": "String"}
assert _format_graphql_type(type_info) == "String"
def test_format_graphql_type_non_null(self):
"""Test formatting non-null GraphQL types."""
type_info = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}}
assert _format_graphql_type(type_info) == "String!"
def test_format_graphql_type_list(self):
"""Test formatting list GraphQL types."""
type_info = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}}
assert _format_graphql_type(type_info) == "[String]"
def test_format_graphql_type_complex(self):
"""Test formatting complex GraphQL types."""
type_info = {
"kind": "NON_NULL",
"ofType": {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}},
}
assert _format_graphql_type(type_info) == "[String]!"
def test_format_operation_list_empty(self):
"""Test formatting empty operation list."""
result = _format_operation_list([], "Query")
assert result == "No query operations found."
def test_format_operation_list_with_operations(self):
"""Test formatting operation list with operations."""
operations = [
{"name": "me", "description": "Get current user"},
{"name": "search", "description": "Search entities"},
]
result = _format_operation_list(operations, "Query")
expected = "Query:\n - me: Get current user\n - search: Search entities"
assert result == expected
def test_format_operation_list_without_descriptions(self):
"""Test formatting operation list without descriptions."""
operations = [{"name": "me"}, {"name": "search", "description": ""}]
result = _format_operation_list(operations, "Query")
expected = "Query:\n - me\n - search"
assert result == expected
def test_format_operation_details(self):
"""Test formatting operation details."""
operation = {
"name": "searchAcrossEntities",
"description": "Search across all entity types",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
}
],
}
result = _format_operation_details(operation, "Query")
expected = (
"Operation: searchAcrossEntities\n"
"Type: Query\n"
"Description: Search across all entity types\n"
"Arguments:\n"
" - input: SearchInput!"
)
assert result == expected
def test_format_operation_details_no_args(self):
"""Test formatting operation details without arguments."""
operation = {"name": "me", "description": "Get current user", "args": []}
result = _format_operation_details(operation, "Query")
expected = (
"Operation: me\nType: Query\nDescription: Get current user\nArguments: None"
)
assert result == expected
def test_find_operation_by_name_in_queries(self):
"""Test finding operation in queries."""
schema = {
"queryType": {
"fields": [
{"name": "me", "description": "Get current user"},
{"name": "search", "description": "Search entities"},
]
},
"mutationType": {
"fields": [{"name": "deleteEntity", "description": "Delete entity"}]
},
}
result = _find_operation_by_name(schema, "me")
assert result is not None
operation, operation_type = result
assert operation["name"] == "me"
assert operation_type == "Query"
def test_find_operation_by_name_in_mutations(self):
"""Test finding operation in mutations."""
schema = {
"queryType": {
"fields": [{"name": "me", "description": "Get current user"}]
},
"mutationType": {
"fields": [{"name": "deleteEntity", "description": "Delete entity"}]
},
}
result = _find_operation_by_name(schema, "deleteEntity")
assert result is not None
operation, operation_type = result
assert operation["name"] == "deleteEntity"
assert operation_type == "Mutation"
def test_find_operation_by_name_not_found(self):
"""Test finding non-existent operation."""
schema = {
"queryType": {"fields": [{"name": "me", "description": "Get current user"}]}
}
result = _find_operation_by_name(schema, "nonExistent")
assert result is None
class TestGraphQLCommand:
"""Test the main GraphQL CLI command."""
def setup_method(self):
"""Set up test environment."""
self.runner = CliRunner()
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_raw_query(self, mock_get_graph):
"""Test executing raw GraphQL query."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {"me": {"username": "testuser"}}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(graphql, ["--query", "query { me { username } }"])
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_once_with(
query="query { me { username } }", variables=None
)
assert '"me"' in result.output
assert '"username": "testuser"' in result.output
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_query_with_variables(self, mock_get_graph):
"""Test executing GraphQL query with variables."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"corpUser": {"info": {"email": "test@example.com"}}
}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(
graphql,
[
"--query",
"query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }",
"--variables",
'{"urn": "urn:li:corpuser:test"}',
],
)
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_once_with(
query="query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }",
variables={"urn": "urn:li:corpuser:test"},
)
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_list_operations(self, mock_get_graph):
"""Test listing GraphQL operations."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"__schema": {
"queryType": {
"fields": [
{"name": "me", "description": "Get current user"},
{"name": "search", "description": "Search entities"},
]
},
"mutationType": {
"fields": [{"name": "deleteEntity", "description": "Delete entity"}]
},
}
}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(graphql, ["--list-operations"])
assert result.exit_code == 0
assert "Queries:" in result.output
assert "me: Get current user" in result.output
assert "Mutations:" in result.output
assert "deleteEntity: Delete entity" in result.output
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_describe_operation(self, mock_get_graph):
"""Test describing a GraphQL operation."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"__schema": {
"queryType": {
"fields": [
{
"name": "searchAcrossEntities",
"description": "Search across all entity types",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {
"kind": "INPUT_OBJECT",
"name": "SearchInput",
},
},
}
],
}
]
}
}
}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(graphql, ["--describe", "searchAcrossEntities"])
assert result.exit_code == 0
assert "Operation: searchAcrossEntities" in result.output
assert "Type: Query" in result.output
assert "Description: Search across all entity types" in result.output
assert "input: SearchInput!" in result.output
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_no_arguments(self, mock_get_graph):
"""Test GraphQL command with no arguments."""
# Mock is needed even for argument validation to avoid config errors
mock_client = Mock()
mock_get_graph.return_value = mock_client
result = self.runner.invoke(graphql, [])
assert result.exit_code != 0
assert (
"Must specify either --query, --operation, or a discovery option"
in result.output
)
@patch("datahub.cli.graphql_cli._get_schema_via_introspection")
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_operation_execution_with_mock_error(
self, mock_get_graph, mock_schema
):
"""Test that operation-based execution works but fails with mock serialization error."""
mock_client = Mock()
# Mock schema introspection to return a valid schema
mock_schema.return_value = {
"queryType": {
"fields": [
{
"name": "searchAcrossEntities",
"args": [{"name": "input", "type": {"kind": "NON_NULL"}}],
}
]
}
}
# Mock the execute_graphql to raise a JSON serialization error like in real scenario
mock_client.execute_graphql.side_effect = TypeError(
"Object of type Mock is not JSON serializable"
)
mock_get_graph.return_value = mock_client
result = self.runner.invoke(
graphql,
[
"--operation",
"searchAcrossEntities",
"--variables",
'{"input": {"query": "test"}}',
],
)
assert result.exit_code != 0
assert "Failed to execute operation 'searchAcrossEntities'" in result.output
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_execution_error(self, mock_get_graph):
"""Test handling GraphQL execution errors."""
mock_client = Mock()
mock_client.execute_graphql.side_effect = Exception("GraphQL error")
mock_get_graph.return_value = mock_client
result = self.runner.invoke(graphql, ["--query", "query { invalidField }"])
assert result.exit_code != 0
assert "Failed to execute GraphQL query: GraphQL error" in result.output
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_no_pretty_output(self, mock_get_graph):
"""Test GraphQL output without pretty printing."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {"me": {"username": "testuser"}}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(
graphql, ["--query", "query { me { username } }", "--no-pretty"]
)
assert result.exit_code == 0
# Output should be compact JSON without indentation
assert '{"me": {"username": "testuser"}}' in result.output
def test_graphql_query_from_file(self):
"""Test loading GraphQL query from file."""
query_content = "query { me { username } }"
with tempfile.NamedTemporaryFile(
mode="w", suffix=".graphql", delete=False
) as tmp:
tmp.write(query_content)
tmp.flush()
with patch("datahub.cli.graphql_cli.get_default_graph") as mock_get_graph:
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"me": {"username": "testuser"}
}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(graphql, ["--query", tmp.name])
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_once_with(
query=query_content, variables=None
)
# Clean up
Path(tmp.name).unlink()
def test_graphql_variables_from_file(self):
"""Test loading variables from JSON file."""
variables = {"urn": "urn:li:corpuser:test"}
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as tmp:
json.dump(variables, tmp)
tmp.flush()
with patch("datahub.cli.graphql_cli.get_default_graph") as mock_get_graph:
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"corpUser": {"info": {"email": "test@example.com"}}
}
mock_get_graph.return_value = mock_client
result = self.runner.invoke(
graphql,
[
"--query",
"query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }",
"--variables",
tmp.name,
],
)
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_once_with(
query="query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }",
variables=variables,
)
# Clean up
Path(tmp.name).unlink()
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_query_from_relative_path(self, mock_get_graph):
"""Test loading GraphQL query from relative path."""
import os
import tempfile
query_content = "query { me { username } }"
mock_client = Mock()
mock_client.execute_graphql.return_value = {"me": {"username": "testuser"}}
mock_get_graph.return_value = mock_client
with tempfile.TemporaryDirectory() as temp_dir:
# Create test file
test_file = Path(temp_dir) / "test_query.graphql"
test_file.write_text(query_content)
# Create subdirectory structure for testing different relative paths
sub_dir = Path(temp_dir) / "subdir"
sub_dir.mkdir()
original_cwd = os.getcwd()
try:
# Test from parent directory with ./
os.chdir(temp_dir)
result = self.runner.invoke(
graphql, ["--query", "./test_query.graphql"]
)
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_with(
query=query_content, variables=None
)
# Reset mock for next test
mock_client.reset_mock()
# Test from subdirectory with ../
os.chdir(sub_dir)
result = self.runner.invoke(
graphql, ["--query", "../test_query.graphql"]
)
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_with(
query=query_content, variables=None
)
finally:
os.chdir(original_cwd)
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_variables_from_relative_path(self, mock_get_graph):
"""Test loading variables from relative JSON file path."""
import os
import tempfile
variables = {"urn": "urn:li:corpuser:test"}
query = (
"query GetUser($urn: String!) { corpUser(urn: $urn) { info { email } } }"
)
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"corpUser": {"info": {"email": "test@example.com"}}
}
mock_get_graph.return_value = mock_client
with tempfile.TemporaryDirectory() as temp_dir:
# Create test variables file
vars_file = Path(temp_dir) / "variables.json"
vars_file.write_text(json.dumps(variables))
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
result = self.runner.invoke(
graphql,
[
"--query",
query,
"--variables",
"./variables.json",
],
)
assert result.exit_code == 0
mock_client.execute_graphql.assert_called_with(
query=query, variables=variables
)
finally:
os.chdir(original_cwd)
@patch("datahub.cli.graphql_cli.get_default_graph")
def test_graphql_query_from_nonexistent_relative_path(self, mock_get_graph):
"""Test error handling with non-existent relative path."""
import os
import tempfile
# Mock client to handle GraphQL execution
mock_client = Mock()
mock_client.execute_graphql.side_effect = Exception("Query execution failed")
mock_get_graph.return_value = mock_client
with tempfile.TemporaryDirectory() as temp_dir:
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
result = self.runner.invoke(
graphql, ["--query", "./nonexistent.graphql"]
)
# Should fail because file doesn't exist, but treated as literal query
assert result.exit_code != 0
assert "Failed to execute GraphQL query" in result.output
finally:
os.chdir(original_cwd)
class TestSchemaFileHandling:
"""Test schema file parsing and fallback functionality."""
def test_parse_graphql_operations_from_files_error_fallback(self):
"""Test that when schema path lookup fails, function falls back gracefully."""
# Test the error handling by directly calling with None schema path
# which triggers the fallback path lookup that will fail in test environment
result = _parse_graphql_operations_from_files(None)
# Should return the minimal fallback operations structure
assert "queryType" in result
assert "mutationType" in result
# Should contain known fallback operations
query_fields = result["queryType"]["fields"]
query_names = [op["name"] for op in query_fields]
assert "me" in query_names
def test_parse_graphql_operations_from_files_with_custom_path(self):
"""Test parsing operations from custom schema path."""
with tempfile.TemporaryDirectory() as temp_dir:
schema_path = Path(temp_dir)
# Create a mock GraphQL schema file
schema_file = schema_path / "test.graphql"
schema_content = """
type Query {
"Get current user"
me: User
"Search entities"
search(query: String!): SearchResults
}
type Mutation {
"Create a new user"
createUser(input: CreateUserInput!): User
}
"""
schema_file.write_text(schema_content)
result = _parse_graphql_operations_from_files(str(schema_path))
# Should parse queries
assert "queryType" in result
assert result["queryType"] is not None
query_fields = result["queryType"]["fields"]
assert len(query_fields) >= 2
# Check specific operations
me_op = next(op for op in query_fields if op["name"] == "me")
assert me_op["description"] == "Get current user"
search_op = next(op for op in query_fields if op["name"] == "search")
assert search_op["description"] == "Search entities"
# Should parse mutations
assert "mutationType" in result
assert result["mutationType"] is not None
mutation_fields = result["mutationType"]["fields"]
assert len(mutation_fields) >= 1
create_user_op = next(
op for op in mutation_fields if op["name"] == "createUser"
)
assert create_user_op["description"] == "Create a new user"
def test_parse_graphql_operations_from_files_nonexistent_custom_path(self):
"""Test parsing operations with non-existent custom schema path."""
# With our improved error handling, this should raise ClickException
nonexistent_path = "/this/path/definitely/does/not/exist/on/any/system"
with pytest.raises(click.ClickException) as exc_info:
_parse_graphql_operations_from_files(nonexistent_path)
assert "Custom schema path does not exist" in str(exc_info.value)
def test_parse_graphql_operations_from_files_fallback_on_error(self):
"""Test that parsing raises clear error when schema files can't be found."""
with patch("datahub.cli.graphql_cli._get_schema_files_path") as mock_get_path:
mock_get_path.side_effect = Exception("Schema files not found")
with pytest.raises(click.ClickException) as exc_info:
_parse_graphql_operations_from_files()
assert "Schema loading failed" in str(exc_info.value)
def test_parse_operations_from_content(self):
"""Test parsing operations from GraphQL content string."""
content = """
\"\"\"Get current authenticated user\"\"\"
me: AuthenticatedUser
"Search across all entity types"
searchAcrossEntities(input: SearchInput!): SearchResults
# This should be skipped as it's not a valid field
type SomeType {
field: String
}
"Browse entities hierarchically"
browse(path: BrowsePath): BrowseResults
"""
operations = _parse_operations_from_content(content, "Query")
assert len(operations) >= 3
# Check specific operations were parsed
op_names = [op["name"] for op in operations]
assert "me" in op_names
assert "searchAcrossEntities" in op_names
assert "browse" in op_names
# Check descriptions were extracted
me_op = next(op for op in operations if op["name"] == "me")
assert "authenticated user" in me_op["description"].lower()
search_op = next(
op for op in operations if op["name"] == "searchAcrossEntities"
)
assert "search across all entity types" in search_op["description"].lower()
def test_parse_operations_from_content_with_keywords(self):
"""Test that GraphQL keywords are properly filtered out."""
content = """
query: String
mutation: String
subscription: String
type: String
input: String
enum: String
validField: String
"""
operations = _parse_operations_from_content(content, "Query")
# Should only contain validField, keywords should be filtered
assert len(operations) == 1
assert operations[0]["name"] == "validField"
class TestOperationGenerationAndQueryBuilding:
"""Test operation generation and query building functionality."""
def test_dict_to_graphql_input_simple(self):
"""Test converting simple dict to GraphQL input syntax."""
input_dict = {"key": "value", "number": 42, "flag": True}
result = _dict_to_graphql_input(input_dict)
assert 'key: "value"' in result
assert "number: 42" in result
assert "flag: true" in result
assert result.startswith("{") and result.endswith("}")
def test_dict_to_graphql_input_nested(self):
"""Test converting nested dict to GraphQL input syntax."""
input_dict = {
"user": {"name": "test", "age": 30},
"tags": ["tag1", "tag2"],
"metadata": {"active": True},
}
result = _dict_to_graphql_input(input_dict)
assert 'user: {name: "test", age: 30}' in result
assert 'tags: ["tag1", "tag2"]' in result
assert "metadata: {active: true}" in result
def test_dict_to_graphql_input_complex_lists(self):
"""Test converting dict with complex list items to GraphQL input syntax."""
input_dict = {
"users": [
{"name": "user1", "active": True},
{"name": "user2", "active": False},
],
"values": [1, 2, 3],
"strings": ["a", "b", "c"],
}
result = _dict_to_graphql_input(input_dict)
assert (
'users: [{name: "user1", active: true}, {name: "user2", active: false}]'
in result
)
assert "values: [1, 2, 3]" in result
assert 'strings: ["a", "b", "c"]' in result
def test_dict_to_graphql_input_non_dict(self):
"""Test handling non-dict input."""
result = _dict_to_graphql_input("not a dict") # type: ignore
assert result == "not a dict"
result = _dict_to_graphql_input(123) # type: ignore
assert result == "123"
def test_generate_operation_query_simple(self):
"""Test generating query for simple operation without arguments."""
operation_field = {"name": "me", "description": "Get current user", "args": []}
result = _generate_operation_query(operation_field, "Query")
expected = "query { me { corpUser { urn username properties { displayName email firstName lastName title } } } }"
assert result == expected
def test_generate_operation_query_with_required_args(self):
"""Test generating query for operation with required arguments."""
operation_field = {
"name": "searchAcrossEntities",
"description": "Search across all entity types",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
}
],
}
variables = {"input": {"query": "test", "start": 0, "count": 10}}
result = _generate_operation_query(operation_field, "Query", variables)
expected = 'query { searchAcrossEntities(input: {query: "test", start: 0, count: 10}) }'
assert result == expected
def test_generate_operation_query_missing_required_args(self):
"""Test error when required arguments are missing."""
operation_field = {
"name": "searchAcrossEntities",
"description": "Search across all entity types",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
}
],
}
with pytest.raises(
Exception,
match="Operation 'searchAcrossEntities' requires arguments: input",
):
_generate_operation_query(operation_field, "Query", None)
def test_generate_operation_query_with_optional_args(self):
"""Test generating query with optional arguments."""
operation_field = {
"name": "browse",
"description": "Browse entities",
"args": [
{
"name": "path",
"type": {"kind": "SCALAR", "name": "String"}, # Optional
},
{
"name": "filter",
"type": {
"kind": "INPUT_OBJECT",
"name": "BrowseFilter",
}, # Optional
},
],
}
variables = {"path": "datasets"}
result = _generate_operation_query(operation_field, "Query", variables)
expected = 'query { browse(path: "datasets") }'
assert result == expected
def test_generate_operation_query_mutation(self):
"""Test generating mutation query."""
operation_field = {
"name": "addTag",
"description": "Add tag to entity",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {
"kind": "INPUT_OBJECT",
"name": "TagAssociationInput",
},
},
}
],
}
variables = {
"input": {"tagUrn": "urn:li:tag:test", "resourceUrn": "urn:li:dataset:test"}
}
result = _generate_operation_query(operation_field, "Mutation", variables)
expected = 'mutation { addTag(input: {tagUrn: "urn:li:tag:test", resourceUrn: "urn:li:dataset:test"}) }'
assert result == expected
def test_generate_operation_query_list_operations(self):
"""Test generating queries for list operations."""
# Test listUsers operation
operation_field = {
"name": "listUsers",
"description": "List all users",
"args": [],
}
result = _generate_operation_query(operation_field, "Query")
expected = "query { listUsers { total users { urn username properties { displayName email } } } }"
assert result == expected
# Test other list operation
operation_field = {
"name": "listDatasets",
"description": "List datasets",
"args": [],
}
result = _generate_operation_query(operation_field, "Query")
expected = "query { listDatasets { total } }"
assert result == expected
def test_generate_operation_query_entity_operations(self):
"""Test generating queries for specific entity operations."""
entity_operations = [
("corpUser", "query { corpUser { urn } }"),
("dataset", "query { dataset { urn } }"),
("dashboard", "query { dashboard { urn } }"),
("chart", "query { chart { urn } }"),
]
for op_name, expected in entity_operations:
operation_field = {
"name": op_name,
"description": f"Get {op_name}",
"args": [],
}
result = _generate_operation_query(operation_field, "Query")
assert result == expected
def test_generate_operation_query_complex_variables(self):
"""Test generating queries with complex variable structures."""
operation_field = {
"name": "complexOperation",
"description": "Complex operation with nested input",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {"kind": "INPUT_OBJECT", "name": "ComplexInput"},
},
}
],
}
complex_variables = {
"input": {
"filters": {
"platform": "snowflake",
"entityTypes": ["DATASET", "TABLE"],
},
"sort": {"field": "name", "direction": "ASC"},
"pagination": {"start": 0, "count": 20},
}
}
result = _generate_operation_query(operation_field, "Query", complex_variables)
# Should contain the complex nested structure
assert "complexOperation(input: {" in result
assert 'platform: "snowflake"' in result
assert 'entityTypes: ["DATASET", "TABLE"]' in result
assert 'direction: "ASC"' in result
def test_generate_operation_query_boolean_handling(self):
"""Test that boolean values are properly formatted."""
operation_field = {
"name": "testOperation",
"description": "Test operation with boolean",
"args": [
{"name": "input", "type": {"kind": "INPUT_OBJECT", "name": "TestInput"}}
],
}
variables = {
"input": {
"active": True,
"deprecated": False,
"count": 0, # Should not be converted to boolean
}
}
result = _generate_operation_query(operation_field, "Query", variables)
assert "active: true" in result
assert "deprecated: false" in result
assert "count: 0" in result
def test_generate_operation_query_string_escaping(self):
"""Test that string values are properly quoted and escaped."""
operation_field = {
"name": "testOperation",
"description": "Test operation with strings",
"args": [
{"name": "input", "type": {"kind": "INPUT_OBJECT", "name": "TestInput"}}
],
}
variables = {
"input": {
"name": "test entity",
"description": 'A test description with "quotes"',
"number": 42,
}
}
result = _generate_operation_query(operation_field, "Query", variables)
assert 'name: "test entity"' in result
assert "number: 42" in result # Numbers should not be quoted
class TestTypeIntrospectionAndRecursiveExploration:
"""Test type introspection and recursive type exploration functionality."""
def test_extract_base_type_name_simple(self):
"""Test extracting base type name from simple type."""
type_info = {"kind": "SCALAR", "name": "String"}
result = _extract_base_type_name(type_info)
assert result == "String"
def test_extract_base_type_name_non_null(self):
"""Test extracting base type name from NON_NULL wrapper."""
type_info = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}}
result = _extract_base_type_name(type_info)
assert result == "String"
def test_extract_base_type_name_list(self):
"""Test extracting base type name from LIST wrapper."""
type_info = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}}
result = _extract_base_type_name(type_info)
assert result == "String"
def test_extract_base_type_name_nested_wrappers(self):
"""Test extracting base type name from nested wrappers."""
type_info = {
"kind": "NON_NULL",
"ofType": {
"kind": "LIST",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
}
result = _extract_base_type_name(type_info)
assert result == "SearchInput"
def test_extract_base_type_name_empty(self):
"""Test extracting base type name from empty or invalid type."""
assert _extract_base_type_name({}) is None
assert _extract_base_type_name(None) is None # type: ignore
assert _extract_base_type_name({"kind": "NON_NULL"}) is None # Missing ofType
def test_find_type_by_name(self):
"""Test finding a type by name using GraphQL introspection."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {
"__type": {
"name": "SearchInput",
"kind": "INPUT_OBJECT",
"inputFields": [
{
"name": "query",
"description": "Search query string",
"type": {"kind": "SCALAR", "name": "String"},
},
{
"name": "start",
"description": "Start offset",
"type": {"kind": "SCALAR", "name": "Int"},
},
],
}
}
result = _find_type_by_name(mock_client, "SearchInput")
assert result is not None
assert result["name"] == "SearchInput"
assert result["kind"] == "INPUT_OBJECT"
assert len(result["inputFields"]) == 2
# Verify the query was executed correctly
mock_client.execute_graphql.assert_called_once()
call_args = mock_client.execute_graphql.call_args
query_arg = (
call_args[1]["query"]
if len(call_args) > 1 and "query" in call_args[1]
else call_args[0][0]
)
assert "SearchInput" in query_arg
def test_find_type_by_name_not_found(self):
"""Test finding a non-existent type."""
mock_client = Mock()
mock_client.execute_graphql.return_value = {"__type": None}
result = _find_type_by_name(mock_client, "NonExistentType")
assert result is None
def test_find_type_by_name_error(self):
"""Test error handling when introspection fails."""
mock_client = Mock()
mock_client.execute_graphql.side_effect = Exception("GraphQL error")
result = _find_type_by_name(mock_client, "SearchInput")
assert result is None
def test_collect_nested_types(self):
"""Test collecting nested type names from a type definition."""
type_info = {
"inputFields": [
{
"name": "filter",
"type": {"kind": "INPUT_OBJECT", "name": "FilterInput"},
},
{
"name": "tags",
"type": {
"kind": "LIST",
"ofType": {"kind": "INPUT_OBJECT", "name": "TagInput"},
},
},
{
"name": "name",
"type": {
"kind": "SCALAR",
"name": "String",
}, # Should be filtered out
},
{
"name": "count",
"type": {"kind": "SCALAR", "name": "Int"}, # Should be filtered out
},
]
}
result = _collect_nested_types(type_info)
assert len(result) == 2
assert "FilterInput" in result
assert "TagInput" in result
# Scalar types should not be included
assert "String" not in result
assert "Int" not in result
def test_collect_nested_types_with_visited(self):
"""Test collecting nested types with visited set to avoid duplicates."""
type_info = {
"inputFields": [
{
"name": "filter1",
"type": {"kind": "INPUT_OBJECT", "name": "FilterInput"},
},
{
"name": "filter2",
"type": {
"kind": "INPUT_OBJECT",
"name": "FilterInput",
}, # Duplicate
},
]
}
visited: set[str] = set()
result = _collect_nested_types(type_info, visited)
# The function doesn't deduplicate internally - it returns all found types
# Deduplication happens at a higher level in the recursive fetching
assert "FilterInput" in result
assert len(result) == 2 # Two references to the same type
def test_fetch_type_recursive(self):
"""Test recursively fetching a type and its nested types."""
mock_client = Mock()
# Mock responses for different types
def mock_execute_graphql(query, **kwargs):
if "SearchInput" in query:
return {
"__type": {
"name": "SearchInput",
"kind": "INPUT_OBJECT",
"inputFields": [
{
"name": "filter",
"type": {"kind": "INPUT_OBJECT", "name": "FilterInput"},
},
{
"name": "query",
"type": {"kind": "SCALAR", "name": "String"},
},
],
}
}
elif "FilterInput" in query:
return {
"__type": {
"name": "FilterInput",
"kind": "INPUT_OBJECT",
"inputFields": [
{
"name": "platform",
"type": {"kind": "SCALAR", "name": "String"},
}
],
}
}
return {"__type": None}
mock_client.execute_graphql.side_effect = mock_execute_graphql
result = _fetch_type_recursive(mock_client, "SearchInput")
# Should contain both types
assert "SearchInput" in result
assert "FilterInput" in result
# Verify structure
search_input = result["SearchInput"]
assert search_input["name"] == "SearchInput"
assert search_input["kind"] == "INPUT_OBJECT"
filter_input = result["FilterInput"]
assert filter_input["name"] == "FilterInput"
assert filter_input["kind"] == "INPUT_OBJECT"
def test_fetch_type_recursive_circular_reference(self):
"""Test handling of circular type references."""
mock_client = Mock()
# Create a circular reference scenario
def mock_execute_graphql(query, **kwargs):
if "TypeA" in query:
return {
"__type": {
"name": "TypeA",
"kind": "INPUT_OBJECT",
"inputFields": [
{
"name": "typeB",
"type": {"kind": "INPUT_OBJECT", "name": "TypeB"},
}
],
}
}
elif "TypeB" in query:
return {
"__type": {
"name": "TypeB",
"kind": "INPUT_OBJECT",
"inputFields": [
{
"name": "typeA",
"type": {
"kind": "INPUT_OBJECT",
"name": "TypeA",
}, # Circular reference
}
],
}
}
return {"__type": None}
mock_client.execute_graphql.side_effect = mock_execute_graphql
result = _fetch_type_recursive(mock_client, "TypeA")
# Should handle circular reference without infinite loop
assert "TypeA" in result
assert "TypeB" in result
assert len(result) == 2 # No duplicates
def test_fetch_type_recursive_error_handling(self):
"""Test error handling during recursive type fetching."""
mock_client = Mock()
mock_client.execute_graphql.side_effect = Exception("GraphQL error")
result = _fetch_type_recursive(mock_client, "SearchInput")
# Should return empty dict on error
assert result == {}
def test_format_single_type_fields_input_object(self):
"""Test formatting fields for an INPUT_OBJECT type."""
type_info = {
"kind": "INPUT_OBJECT",
"inputFields": [
{
"name": "query",
"description": "Search query string",
"type": {"kind": "SCALAR", "name": "String"},
},
{
"name": "filter",
"type": {"kind": "INPUT_OBJECT", "name": "FilterInput"},
},
],
}
result = _format_single_type_fields(type_info)
assert len(result) == 2
assert " query: String - Search query string" in result
assert " filter: FilterInput" in result
def test_format_single_type_fields_enum(self):
"""Test formatting enum values for an ENUM type."""
type_info = {
"kind": "ENUM",
"enumValues": [
{
"name": "ACTIVE",
"description": "Entity is active",
"isDeprecated": False,
},
{
"name": "DEPRECATED_VALUE",
"description": "Old value",
"isDeprecated": True,
"deprecationReason": "Use ACTIVE instead",
},
],
}
result = _format_single_type_fields(type_info)
assert len(result) == 2
assert " ACTIVE - Entity is active" in result
assert (
" DEPRECATED_VALUE - Old value (DEPRECATED: Use ACTIVE instead)" in result
)
def test_format_single_type_fields_empty(self):
"""Test formatting empty type (no fields or enum values)."""
# Empty INPUT_OBJECT
type_info = {"kind": "INPUT_OBJECT", "inputFields": []}
result = _format_single_type_fields(type_info)
assert result == [" (no fields)"]
# Empty ENUM
type_info = {"kind": "ENUM", "enumValues": []}
result = _format_single_type_fields(type_info)
assert result == [" (no enum values)"]
def test_format_recursive_types(self):
"""Test formatting multiple types in hierarchical display."""
types_map = {
"SearchInput": {
"name": "SearchInput",
"kind": "INPUT_OBJECT",
"inputFields": [
{"name": "query", "type": {"kind": "SCALAR", "name": "String"}}
],
},
"FilterInput": {
"name": "FilterInput",
"kind": "INPUT_OBJECT",
"inputFields": [
{"name": "platform", "type": {"kind": "SCALAR", "name": "String"}}
],
},
}
result = _format_recursive_types(types_map, "SearchInput")
# Should display root type first
lines = result.split("\n")
assert "SearchInput:" in lines[0]
assert " query: String" in result
# Should display nested types
assert "FilterInput:" in result
assert " platform: String" in result
def test_format_recursive_types_root_type_missing(self):
"""Test formatting when root type is not in the types map."""
types_map = {
"FilterInput": {
"name": "FilterInput",
"kind": "INPUT_OBJECT",
"inputFields": [],
}
}
result = _format_recursive_types(types_map, "SearchInput")
# Should still display other types
assert "FilterInput:" in result
# Should not crash when root type is missing
class TestJSONOutputFormatting:
"""Test JSON output formatting for LLM consumption."""
def test_convert_type_to_json_simple(self):
"""Test converting simple GraphQL type to JSON format."""
type_info = {"kind": "SCALAR", "name": "String"}
result = _convert_type_to_json(type_info)
expected = {"kind": "SCALAR", "name": "String"}
assert result == expected
def test_convert_type_to_json_non_null(self):
"""Test converting NON_NULL type to JSON format."""
type_info = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}}
result = _convert_type_to_json(type_info)
expected = {"kind": "NON_NULL", "ofType": {"kind": "SCALAR", "name": "String"}}
assert result == expected
def test_convert_type_to_json_list(self):
"""Test converting LIST type to JSON format."""
type_info = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}}
result = _convert_type_to_json(type_info)
expected = {"kind": "LIST", "ofType": {"kind": "SCALAR", "name": "String"}}
assert result == expected
def test_convert_type_to_json_complex(self):
"""Test converting complex nested type to JSON format."""
type_info = {
"kind": "NON_NULL",
"ofType": {
"kind": "LIST",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
}
result = _convert_type_to_json(type_info)
expected = {
"kind": "NON_NULL",
"ofType": {
"kind": "LIST",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
}
assert result == expected
def test_convert_type_to_json_empty(self):
"""Test converting empty type info."""
result = _convert_type_to_json({})
assert result == {}
result = _convert_type_to_json(None) # type: ignore
assert result == {}
def test_convert_operation_to_json(self):
"""Test converting operation info to JSON format."""
operation = {
"name": "searchAcrossEntities",
"description": "Search across all entity types",
"args": [
{
"name": "input",
"description": "Search input parameters",
"type": {
"kind": "NON_NULL",
"ofType": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
},
},
{
"name": "limit",
"description": "Maximum results to return",
"type": {"kind": "SCALAR", "name": "Int"}, # Optional
},
],
}
result = _convert_operation_to_json(operation, "Query")
assert result["name"] == "searchAcrossEntities"
assert result["type"] == "Query"
assert result["description"] == "Search across all entity types"
assert len(result["arguments"]) == 2
# Check required argument
input_arg = result["arguments"][0]
assert input_arg["name"] == "input"
assert input_arg["description"] == "Search input parameters"
assert input_arg["required"] is True
assert input_arg["type"]["kind"] == "NON_NULL"
# Check optional argument
limit_arg = result["arguments"][1]
assert limit_arg["name"] == "limit"
assert limit_arg["required"] is False
assert limit_arg["type"]["kind"] == "SCALAR"
def test_convert_operation_to_json_no_args(self):
"""Test converting operation with no arguments to JSON format."""
operation = {"name": "me", "description": "Get current user", "args": []}
result = _convert_operation_to_json(operation, "Query")
assert result["name"] == "me"
assert result["type"] == "Query"
assert result["description"] == "Get current user"
assert result["arguments"] == []
def test_convert_type_details_to_json_input_object(self):
"""Test converting INPUT_OBJECT type details to JSON format."""
type_info = {
"name": "SearchInput",
"kind": "INPUT_OBJECT",
"description": "Input for search operations",
"inputFields": [
{
"name": "query",
"description": "Search query string",
"type": {"kind": "SCALAR", "name": "String"},
},
{
"name": "filter",
"description": "Search filters",
"type": {"kind": "INPUT_OBJECT", "name": "FilterInput"},
},
],
}
result = _convert_type_details_to_json(type_info)
assert result["name"] == "SearchInput"
assert result["kind"] == "INPUT_OBJECT"
assert result["description"] == "Input for search operations"
assert len(result["fields"]) == 2
query_field = result["fields"][0]
assert query_field["name"] == "query"
assert query_field["description"] == "Search query string"
assert query_field["type"]["kind"] == "SCALAR"
def test_convert_type_details_to_json_enum(self):
"""Test converting ENUM type details to JSON format."""
type_info = {
"name": "EntityType",
"kind": "ENUM",
"description": "Types of entities in DataHub",
"enumValues": [
{
"name": "DATASET",
"description": "Dataset entity",
"isDeprecated": False,
},
{
"name": "LEGACY_TYPE",
"description": "Old entity type",
"isDeprecated": True,
"deprecationReason": "Use DATASET instead",
},
],
}
result = _convert_type_details_to_json(type_info)
assert result["name"] == "EntityType"
assert result["kind"] == "ENUM"
assert result["description"] == "Types of entities in DataHub"
assert len(result["values"]) == 2
dataset_value = result["values"][0]
assert dataset_value["name"] == "DATASET"
assert dataset_value["description"] == "Dataset entity"
assert dataset_value["deprecated"] is False
legacy_value = result["values"][1]
assert legacy_value["name"] == "LEGACY_TYPE"
assert legacy_value["deprecated"] is True
assert legacy_value["deprecationReason"] == "Use DATASET instead"
def test_convert_operations_list_to_json(self):
"""Test converting full operations list to JSON format."""
schema = {
"queryType": {
"fields": [
{"name": "me", "description": "Get current user", "args": []},
{
"name": "search",
"description": "Search entities",
"args": [
{
"name": "query",
"type": {"kind": "SCALAR", "name": "String"},
}
],
},
]
},
"mutationType": {
"fields": [
{
"name": "addTag",
"description": "Add tag to entity",
"args": [
{
"name": "input",
"type": {
"kind": "NON_NULL",
"ofType": {
"kind": "INPUT_OBJECT",
"name": "TagInput",
},
},
}
],
}
]
},
}
result = _convert_operations_list_to_json(schema)
assert "schema" in result
assert "queries" in result["schema"]
assert "mutations" in result["schema"]
# Check queries
queries = result["schema"]["queries"]
assert len(queries) == 2
assert queries[0]["name"] == "me"
assert queries[0]["type"] == "Query"
assert queries[1]["name"] == "search"
# Check mutations
mutations = result["schema"]["mutations"]
assert len(mutations) == 1
assert mutations[0]["name"] == "addTag"
assert mutations[0]["type"] == "Mutation"
def test_convert_operations_list_to_json_empty_schema(self):
"""Test converting empty schema to JSON format."""
schema: dict[str, Any] = {}
result = _convert_operations_list_to_json(schema)
assert result == {"schema": {"queries": [], "mutations": []}}
def test_convert_describe_to_json_operation_only(self):
"""Test converting describe output with operation only."""
operation_info = (
{
"name": "searchAcrossEntities",
"description": "Search across all entity types",
"args": [],
},
"Query",
)
result = _convert_describe_to_json(operation_info, None, None)
assert "operation" in result
assert result["operation"]["name"] == "searchAcrossEntities"
assert result["operation"]["type"] == "Query"
assert "type" not in result
assert "relatedTypes" not in result
def test_convert_describe_to_json_type_only(self):
"""Test converting describe output with type only."""
type_info = {
"name": "SearchInput",
"kind": "INPUT_OBJECT",
"inputFields": [
{"name": "query", "type": {"kind": "SCALAR", "name": "String"}}
],
}
result = _convert_describe_to_json(None, type_info, None)
assert "type" in result
assert result["type"]["name"] == "SearchInput"
assert result["type"]["kind"] == "INPUT_OBJECT"
assert "operation" not in result
assert "relatedTypes" not in result
def test_convert_describe_to_json_with_related_types(self):
"""Test converting describe output with related types."""
operation_info = (
{
"name": "search",
"description": "Search operation",
"args": [
{
"name": "input",
"type": {"kind": "INPUT_OBJECT", "name": "SearchInput"},
}
],
},
"Query",
)
type_info = {"name": "SearchInput", "kind": "INPUT_OBJECT", "inputFields": []}
related_types = {
"SearchInput": {
"name": "SearchInput",
"kind": "INPUT_OBJECT",
"inputFields": [],
},
"FilterInput": {
"name": "FilterInput",
"kind": "INPUT_OBJECT",
"inputFields": [],
},
}
result = _convert_describe_to_json(operation_info, type_info, related_types)
assert "operation" in result
assert "type" in result
assert "relatedTypes" in result
assert len(result["relatedTypes"]) == 2
assert "SearchInput" in result["relatedTypes"]
assert "FilterInput" in result["relatedTypes"]
def test_convert_describe_to_json_all_none(self):
"""Test converting describe output when everything is None."""
result = _convert_describe_to_json(None, None, None)
assert result == {}
def test_json_formatting_preserves_structure(self):
"""Test that JSON formatting preserves all necessary structure for LLMs."""
# Complex operation with nested types
operation = {
"name": "complexSearch",
"description": "Complex search with multiple parameters",
"args": [
{
"name": "input",
"description": "Search input",
"type": {
"kind": "NON_NULL",
"ofType": {
"kind": "INPUT_OBJECT",
"name": "ComplexSearchInput",
},
},
},
{
"name": "options",
"description": "Search options",
"type": {
"kind": "LIST",
"ofType": {"kind": "ENUM", "name": "SearchOption"},
},
},
],
}
result = _convert_operation_to_json(operation, "Query")
# Verify complete structure is preserved
assert result["name"] == "complexSearch"
assert result["type"] == "Query"
assert result["description"] == "Complex search with multiple parameters"
assert len(result["arguments"]) == 2
# Verify nested type structure is preserved
input_arg = result["arguments"][0]
assert input_arg["required"] is True
assert input_arg["type"]["kind"] == "NON_NULL"
assert input_arg["type"]["ofType"]["kind"] == "INPUT_OBJECT"
assert input_arg["type"]["ofType"]["name"] == "ComplexSearchInput"
options_arg = result["arguments"][1]
assert options_arg["required"] is False
assert options_arg["type"]["kind"] == "LIST"
assert options_arg["type"]["ofType"]["kind"] == "ENUM"
assert options_arg["type"]["ofType"]["name"] == "SearchOption"
class TestCoverageImprovementTargets:
"""Test specific uncovered code paths to improve coverage."""
def test_parse_graphql_operations_file_not_found_error(self):
"""Test error when schema files cannot be found."""
with patch(
"datahub.cli.graphql_cli._get_schema_files_path",
side_effect=FileNotFoundError,
):
with pytest.raises(click.ClickException) as exc_info:
_parse_graphql_operations_from_files("/nonexistent/path")
assert "Schema loading failed" in str(exc_info.value)
class TestCLIArgumentValidationAndEdgeCases:
"""Test CLI argument validation and edge case handling."""
def test_invalid_operation_name_handling(self):
"""Test handling of invalid operation names."""
# Test with empty operations list
result = _format_operation_list([], "query")
assert "No query operations found" in result
def test_parse_variables_malformed_json(self):
"""Test handling of malformed variable JSON."""
# Test malformed JSON variables
from click.exceptions import ClickException
with pytest.raises(
ClickException
): # Should raise ClickException on JSON parse error
_parse_variables('{"malformed": json')
def test_parse_variables_empty_input(self):
"""Test handling of empty variable input."""
result = _parse_variables("")
assert result is None
result = _parse_variables(None)
assert result is None
def test_output_format_validation_edge_cases(self):
"""Test output format validation with edge cases."""
# Test with empty operations list for mutations
result = _format_operation_list([], "mutation")
assert "No mutation operations found" in result
class TestComplexTypeResolutionScenarios:
"""Test complex type resolution scenarios for better coverage."""
def test_deeply_nested_type_resolution(self):
"""Test deeply nested type structures."""
base_type = _extract_base_type_name(
{
"kind": "NON_NULL",
"ofType": {
"kind": "LIST",
"ofType": {
"kind": "NON_NULL",
"ofType": {"kind": "SCALAR", "name": "String"},
},
},
}
)
assert base_type == "String"
def test_unknown_type_kind_handling(self):
"""Test handling of unknown type kinds."""
# Test with unknown/unsupported type kind
result = _extract_base_type_name({"kind": "UNKNOWN_KIND", "name": "SomeType"})
assert result == "SomeType" # Should fallback to name
def test_type_conversion_edge_cases(self):
"""Test type conversion edge cases for JSON output."""
# Test type with missing optional fields
type_info = {
"kind": "OBJECT",
"name": "IncompleteType",
# Missing fields, description, etc.
}
result = _convert_type_to_json(type_info)
assert result["name"] == "IncompleteType"
assert result["kind"] == "OBJECT"
# Function only returns basic structure, not fields
def test_fetch_type_recursive_with_visited_set(self):
"""Test recursive type fetching with visited set."""
schema = {
"types": [
{
"name": "TestType",
"kind": "OBJECT",
"fields": [
{"name": "field1", "type": {"kind": "SCALAR", "name": "String"}}
],
}
]
}
visited: set[str] = set()
result = _fetch_type_recursive(schema, "TestType", visited)
assert result is not None
class TestAdvancedJSONOutputFormatting:
"""Test advanced JSON output formatting edge cases."""
def test_convert_describe_to_json_with_none_inputs(self):
"""Test describe functionality with None inputs."""
# This should handle the case where inputs are None gracefully
result = _convert_describe_to_json(None, None)
# Should return something (empty dict or error info)
assert isinstance(result, dict)
def test_operation_list_conversion_with_empty_schema(self):
"""Test operation list conversion with minimal schema."""
schema: dict[str, Any] = {
"queryType": {"fields": []},
"mutationType": {"fields": []},
}
result = _convert_operations_list_to_json(schema)
assert isinstance(result, dict)
assert "schema" in result
def test_json_output_with_special_characters(self):
"""Test JSON output handling of special characters."""
operation = {
"name": "test_with_ñéw_chars",
"description": "Test with special chars: <>&\"'",
"args": [],
}
result = _convert_operation_to_json(operation, "Query")
assert result["name"] == "test_with_ñéw_chars"
assert "<>&" in result["description"]
def test_dict_to_graphql_input_edge_cases(self):
"""Test dictionary to GraphQL input conversion with edge cases."""
# Test empty dict
result = _dict_to_graphql_input({})
assert result == "{}"
# Test nested structure
nested = {"outer": {"inner": "value", "number": 42}}
result = _dict_to_graphql_input(nested)
assert "outer" in result
assert "inner" in result
class TestMainCLIFunction:
"""Test the main graphql() CLI function to improve coverage of CLI entry points."""
def test_graphql_list_operations_mode(self):
"""Test --list-operations CLI mode."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._get_schema_via_introspection"
) as mock_schema,
patch("datahub.cli.graphql_cli._handle_list_operations") as mock_handler,
):
mock_schema.return_value = {"test": "schema"}
# Call CLI command with --list-operations
result = runner.invoke(graphql, ["--list-operations", "--format", "human"])
# Should execute successfully
assert result.exit_code == 0
# Verify the correct handler was called
mock_handler.assert_called_once()
def test_graphql_list_queries_mode(self):
"""Test --list-queries CLI mode."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._get_schema_via_introspection"
) as mock_schema,
patch("datahub.cli.graphql_cli._handle_list_queries") as mock_handler,
):
mock_schema.return_value = {"test": "schema"}
# Call CLI command with --list-queries
result = runner.invoke(graphql, ["--list-queries", "--format", "json"])
# Should execute successfully
assert result.exit_code == 0
# Verify the correct handler was called
mock_handler.assert_called_once()
def test_graphql_list_mutations_mode(self):
"""Test --list-mutations CLI mode."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._get_schema_via_introspection"
) as mock_schema,
patch("datahub.cli.graphql_cli._handle_list_mutations") as mock_handler,
):
mock_schema.return_value = {"test": "schema"}
# Call CLI command with --list-mutations and --no-pretty
result = runner.invoke(
graphql, ["--list-mutations", "--no-pretty", "--format", "human"]
)
# Should execute successfully
assert result.exit_code == 0
# Verify the correct handler was called
mock_handler.assert_called_once()
def test_graphql_describe_mode(self):
"""Test --describe CLI mode with schema introspection."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._get_schema_via_introspection"
) as mock_schema,
patch("datahub.cli.graphql_cli._handle_describe") as mock_handler,
):
mock_schema.return_value = {"test": "schema"}
# Call CLI command with --describe and --recurse (no schema path)
result = runner.invoke(
graphql,
[
"--describe",
"TestType",
"--recurse",
"--format",
"json",
],
)
# Should execute successfully
assert result.exit_code == 0
# Verify the correct handler was called
mock_handler.assert_called_once()
def test_graphql_query_execution_mode(self):
"""Test query execution mode."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch("datahub.cli.graphql_cli._execute_query") as mock_execute,
):
mock_execute.return_value = {"data": {"test": "result"}}
# Call CLI command with --query
result = runner.invoke(
graphql, ["--query", "{ __typename }", "--format", "json"]
)
# Should execute successfully
assert result.exit_code == 0
# Verify query execution was called
mock_execute.assert_called_once()
def test_graphql_operation_execution_mode(self):
"""Test operation execution mode."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch("datahub.cli.graphql_cli._execute_operation") as mock_execute,
):
mock_execute.return_value = {"data": {"test": "result"}}
# Call CLI command with --operation and --variables
result = runner.invoke(
graphql,
[
"--operation",
"testOperation",
"--variables",
'{"var": "value"}',
"--format",
"json",
],
)
# Should execute successfully
assert result.exit_code == 0
# Verify operation execution was called
mock_execute.assert_called_once()
def test_graphql_both_queries_and_mutations_list(self):
"""Test listing both queries and mutations together."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._get_schema_via_introspection"
) as mock_schema,
patch("datahub.cli.graphql_cli._handle_list_operations") as mock_handler,
):
mock_schema.return_value = {"test": "schema"}
# Call CLI command with both --list-queries and --list-mutations
result = runner.invoke(
graphql, ["--list-queries", "--list-mutations", "--format", "human"]
)
# Should execute successfully
assert result.exit_code == 0
# Should call list_operations handler when both are true
mock_handler.assert_called_once()
def test_graphql_with_custom_schema_path(self):
"""Test CLI with custom schema path."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._parse_graphql_operations_from_files"
) as mock_parse,
patch("datahub.cli.graphql_cli._handle_list_operations"),
):
mock_parse.return_value = {"test": "schema"}
# Call CLI command with custom --schema-path
result = runner.invoke(
graphql,
[
"--list-operations",
"--schema-path",
"/custom/schema/path",
"--format",
"json",
],
)
# Should execute successfully
assert result.exit_code == 0
# Verify schema was loaded with custom path
mock_parse.assert_called_once_with("/custom/schema/path")
class TestCLIFilePathHandling:
"""Test CLI file path handling and schema discovery to improve coverage."""
def test_graphql_query_with_file_path(self):
"""Test CLI with query from file path."""
runner = CliRunner()
with tempfile.NamedTemporaryFile(
mode="w", suffix=".graphql", delete=False
) as f:
f.write("{ __typename }")
temp_path = f.name
try:
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch("datahub.cli.graphql_cli._execute_query") as mock_execute,
):
mock_execute.return_value = {"data": {"test": "result"}}
# Call CLI command with file path as query
result = runner.invoke(
graphql, ["--query", temp_path, "--format", "json"]
)
# Should execute successfully
assert result.exit_code == 0
# Should execute the query loaded from file
mock_execute.assert_called_once()
finally:
os.unlink(temp_path)
def test_graphql_variables_with_file_path(self):
"""Test CLI with variables from file path."""
runner = CliRunner()
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
json.dump({"testVar": "testValue"}, f)
temp_path = f.name
try:
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch("datahub.cli.graphql_cli._execute_query") as mock_execute,
):
mock_execute.return_value = {"data": {"test": "result"}}
# Call CLI command with file path as variables
result = runner.invoke(
graphql,
[
"--query",
"{ __typename }",
"--variables",
temp_path,
"--format",
"json",
],
)
# Should execute successfully
assert result.exit_code == 0
# Should execute with variables loaded from file
mock_execute.assert_called_once()
finally:
os.unlink(temp_path)
class TestCLIOutputFormatting:
"""Test CLI output formatting and pretty-printing to improve coverage."""
def test_json_output_format(self):
"""Test JSON output format handling."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch("datahub.cli.graphql_cli._execute_query") as mock_execute,
):
mock_execute.return_value = {"data": {"test": "result"}}
# Call CLI command with JSON format
result = runner.invoke(
graphql, ["--query", "{ __typename }", "--format", "json"]
)
# Should execute successfully
assert result.exit_code == 0
# Should have some output
assert len(result.output) > 0
def test_human_output_format(self):
"""Test human-readable output format handling."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch("datahub.cli.graphql_cli._execute_query") as mock_execute,
):
mock_execute.return_value = {"data": {"test": "result"}}
# Call CLI command with human format
result = runner.invoke(
graphql, ["--query", "{ __typename }", "--format", "human"]
)
# Should execute successfully
assert result.exit_code == 0
# Should have some output
assert len(result.output) > 0
def test_no_pretty_flag_handling(self):
"""Test --no-pretty flag processing."""
runner = CliRunner()
with (
patch("datahub.cli.graphql_cli.get_default_graph"),
patch(
"datahub.cli.graphql_cli._get_schema_via_introspection"
) as mock_schema,
patch("datahub.cli.graphql_cli._handle_list_operations") as mock_handler,
):
mock_schema.return_value = {"test": "schema"}
# Call CLI command with --no-pretty flag
result = runner.invoke(
graphql, ["--list-operations", "--no-pretty", "--format", "json"]
)
# Should execute successfully
assert result.exit_code == 0
# Handler should be called
mock_handler.assert_called_once()