mirror of
				https://github.com/datahub-project/datahub.git
				synced 2025-10-31 18:59:23 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			105 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			105 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from datetime import datetime
 | |
| from unittest.mock import MagicMock
 | |
| 
 | |
| 
 | |
| def mock_temp_table_cursor(cursor: MagicMock) -> None:
 | |
|     cursor.description = [
 | |
|         ["transaction_id"],
 | |
|         ["session_id"],
 | |
|         ["query_text"],
 | |
|         ["create_command"],
 | |
|         ["start_time"],
 | |
|     ]
 | |
| 
 | |
|     cursor.fetchmany.side_effect = [
 | |
|         [
 | |
|             (
 | |
|                 126,
 | |
|                 "abc",
 | |
|                 "CREATE TABLE #player_price distkey(player_id) AS SELECT player_id, SUM(price) AS "
 | |
|                 "price_usd from player_activity group by player_id",
 | |
|                 "CREATE TABLE #player_price",
 | |
|                 datetime.now(),
 | |
|             )
 | |
|         ],
 | |
|         [
 | |
|             # Empty result to stop the while loop
 | |
|         ],
 | |
|     ]
 | |
| 
 | |
| 
 | |
| def mock_stl_insert_table_cursor(cursor: MagicMock) -> None:
 | |
|     cursor.description = [
 | |
|         ["source_schema"],
 | |
|         ["source_table"],
 | |
|         ["target_schema"],
 | |
|         ["target_table"],
 | |
|         ["ddl"],
 | |
|     ]
 | |
| 
 | |
|     cursor.fetchmany.side_effect = [
 | |
|         [
 | |
|             (
 | |
|                 "public",
 | |
|                 "#player_price",
 | |
|                 "public",
 | |
|                 "player_price_with_hike_v6",
 | |
|                 "INSERT INTO player_price_with_hike_v6 SELECT (price_usd + 0.2 * price_usd) as price, '20%' FROM "
 | |
|                 "#player_price",
 | |
|             )
 | |
|         ],
 | |
|         [
 | |
|             # Empty result to stop the while loop
 | |
|         ],
 | |
|     ]
 | |
| 
 | |
| 
 | |
| query_vs_cursor_mocker = {
 | |
|     (
 | |
|         "-- DataHub Redshift Source temp table DDL query\n            select\n                *\n            "
 | |
|         "from\n                (\n                select\n                    session_id,\n                  "
 | |
|         "  transaction_id,\n                    start_time,\n                    userid,\n                   "
 | |
|         " REGEXP_REPLACE(REGEXP_SUBSTR(REGEXP_REPLACE(query_text,'\\\\\\\\n','\\\\n'), '(CREATE(?:["
 | |
|         "\\\\n\\\\s\\\\t]+(?:temp|temporary))?(?:[\\\\n\\\\s\\\\t]+)table(?:[\\\\n\\\\s\\\\t]+)["
 | |
|         "^\\\\n\\\\s\\\\t()-]+)', 0, 1, 'ipe'),'[\\\\n\\\\s\\\\t]+',' ',1,'p') as create_command,\n          "
 | |
|         "          query_text,\n                    row_number() over (\n                        partition "
 | |
|         "by session_id, TRIM(query_text)\n                        order by start_time desc\n                    ) rn\n   "
 | |
|         "             from\n                    (\n                    select\n                        pid "
 | |
|         "as session_id,\n                        xid as transaction_id,\n                        starttime "
 | |
|         "as start_time,\n                        type,\n                        query_text,\n                "
 | |
|         "        userid\n                    from\n                        (\n                        "
 | |
|         "select\n                            starttime,\n                            pid,\n                  "
 | |
|         "          xid,\n                            type,\n                            userid,\n            "
 | |
|         "                LISTAGG(case\n                                when LEN(RTRIM(text)) = 0 then text\n "
 | |
|         "                               else RTRIM(text)\n                            end,\n                 "
 | |
|         "           '') within group (\n                                order by sequence\n                  "
 | |
|         "          ) as query_text\n                        from\n                            "
 | |
|         "SVL_STATEMENTTEXT\n                        where\n                            type in ('DDL', "
 | |
|         "'QUERY')\n                            AND        starttime >= '2024-01-01 12:00:00'\n               "
 | |
|         "             AND        starttime < '2024-01-10 12:00:00'\n                            -- See "
 | |
|         "https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl"
 | |
|         "-statementtext\n                            AND sequence < 320\n                        group by\n  "
 | |
|         "                          starttime,\n                            pid,\n                            "
 | |
|         "xid,\n                            type,\n                            userid\n                       "
 | |
|         " order by\n                            starttime,\n                            pid,\n               "
 | |
|         "             xid,\n                            type,\n                            userid\n          "
 | |
|         "                  asc)\n                    where\n                        type in ('DDL', "
 | |
|         "'QUERY')\n                )\n                where\n                    (create_command ilike "
 | |
|         "'create temp table %'\n                        or create_command ilike 'create temporary table %'\n "
 | |
|         "                       -- we want to get all the create table statements and not just temp tables "
 | |
|         "if non temp table is created and dropped in the same transaction\n                        or "
 | |
|         "create_command ilike 'create table %')\n                    -- Redshift creates temp tables with "
 | |
|         "the following names: volt_tt_%. We need to filter them out.\n                    and query_text not "
 | |
|         "ilike 'CREATE TEMP TABLE volt_tt_%'\n                    and create_command not like 'CREATE TEMP "
 | |
|         "TABLE volt_tt_'\n                    -- We need to filter out our query and it was not possible "
 | |
|         "earlier when we did not have any comment in the query\n                    and query_text not ilike "
 | |
|         "'%https://stackoverflow.com/questions/72770890/redshift-result-size-exceeds-listagg-limit-on-svl"
 | |
|         "-statementtext%'\n\n            )\n            where\n                rn = 1\n            "
 | |
|     ): mock_temp_table_cursor,
 | |
|     "select * from test_collapse_temp_lineage": mock_stl_insert_table_cursor,
 | |
| }
 | |
| 
 | |
| 
 | |
| def mock_cursor(cursor: MagicMock, query: str) -> None:
 | |
|     query_vs_cursor_mocker[query](cursor=cursor)
 | 
