import pytest from datahub.ingestion.source.tableau import TableauSource def test_tableau_source_unescapes_lt(): res = TableauSource._clean_tableau_query_parameters( "select * from t where c1 << 135" ) assert res == "select * from t where c1 < 135" def test_tableau_source_unescapes_gt(): res = TableauSource._clean_tableau_query_parameters( "select * from t where c1 >> 135" ) assert res == "select * from t where c1 > 135" def test_tableau_source_unescapes_gte(): res = TableauSource._clean_tableau_query_parameters( "select * from t where c1 >>= 135" ) assert res == "select * from t where c1 >= 135" def test_tableau_source_unescapeslgte(): res = TableauSource._clean_tableau_query_parameters( "select * from t where c1 <<= 135" ) assert res == "select * from t where c1 <= 135" def test_tableau_source_doesnt_touch_not_escaped(): res = TableauSource._clean_tableau_query_parameters( "select * from t where c1 < 135 and c2 > 15" ) assert res == "select * from t where c1 < 135 and c2 > 15" TABLEAU_PARAMS = [ "", "", "", "", "<[Parameters].MyParam>", "<[Parameters].MyParam_1>", "<[Parameters].My Param _ 1>", "<[Parameters].My Param 1 !@\"',.#$%^:;&*()-_+={}|\\ /<>", "", "", "", "", "<[Parameters].[MyParam]>", "<[Parameters].[MyParam_1]>", "<[Parameters].[My Param _ 1]>", "<[Parameters].[My Param 1 !@\"',.#$%^:;&*()-_+={}|\\ /<]>", "]>", "<[Parameters].[My Param 1 !@\"',.#$%^:;&*()-_+={}|\\ /<>]>", ] @pytest.mark.parametrize("p", TABLEAU_PARAMS) def test_tableau_source_cleanups_tableau_parameters_in_equi_predicates(p): assert ( TableauSource._clean_tableau_query_parameters( f"select * from t where c1 = {p} and c2 = {p} and c3 = 7" ) == "select * from t where c1 = 1 and c2 = 1 and c3 = 7" ) @pytest.mark.parametrize("p", TABLEAU_PARAMS) def test_tableau_source_cleanups_tableau_parameters_in_lt_gt_predicates(p): assert ( TableauSource._clean_tableau_query_parameters( f"select * from t where c1 << {p} and c2<<{p} and c3 >> {p} and c4>>{p} or {p} >> c1 and {p}>>c2 and {p} << c3 and {p}< 1 and c4>1 or 1 > c1 and 1>c2 and 1 < c3 and 1>= {p} and c4>>={p} or {p} >>= c1 and {p}>>=c2 and {p} <<= c3 and {p}<<=c4" ) == "select * from t where c1 <= 1 and c2<=1 and c3 >= 1 and c4>=1 or 1 >= c1 and 1>=c2 and 1 <= c3 and 1<=c4" ) @pytest.mark.parametrize("p", TABLEAU_PARAMS) def test_tableau_source_cleanups_tableau_parameters_in_join_predicate(p): assert ( TableauSource._clean_tableau_query_parameters( f"select * from t1 inner join t2 on t1.id = t2.id and t2.c21 = {p} and t1.c11 = 123 + {p}" ) == "select * from t1 inner join t2 on t1.id = t2.id and t2.c21 = 1 and t1.c11 = 123 + 1" ) @pytest.mark.parametrize("p", TABLEAU_PARAMS) def test_tableau_source_cleanups_tableau_parameters_in_complex_expressions(p): assert ( TableauSource._clean_tableau_query_parameters( f"select myudf1(c1, {p}, c2) / myudf2({p}) > ({p} + 3 * {p} * c5) * {p} - c4" ) == "select myudf1(c1, 1, c2) / myudf2(1) > (1 + 3 * 1 * c5) * 1 - c4" ) @pytest.mark.parametrize("p", TABLEAU_PARAMS) def test_tableau_source_cleanups_tableau_parameters_in_udfs(p): assert ( TableauSource._clean_tableau_query_parameters(f"select myudf({p}) from t") == "select myudf(1) from t" )