feat: Support CSV ingestion through the UI (#9280)

Co-authored-by: Gabe Lyons <itsgabelyons@gmail.com>
This commit is contained in:
purnimagarg1 2023-12-04 22:58:41 +05:30 committed by GitHub
parent 7857944bb5
commit f9b24e0724
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 143 additions and 2 deletions

View File

@ -0,0 +1,27 @@
import React from 'react';
import { Alert } from 'antd';
const CSV_FORMAT_LINK = 'https://datahubproject.io/docs/generated/ingestion/sources/csv';
export const CSVInfo = () => {
const link = (
<a href={CSV_FORMAT_LINK} target="_blank" rel="noopener noreferrer">
link
</a>
);
return (
<Alert
style={{ marginBottom: '10px' }}
type="warning"
banner
message={
<>
Add the URL of your CSV file to be ingested. This will work for any web-hosted CSV file. For
example, You can create a file in google sheets following the format at this {link} and then
construct the CSV URL by publishing your google sheet in the CSV format.
</>
}
/>
);
};

View File

@ -7,8 +7,9 @@ import { ANTD_GRAY } from '../../../entity/shared/constants';
import { YamlEditor } from './YamlEditor';
import RecipeForm from './RecipeForm/RecipeForm';
import { SourceBuilderState, SourceConfig } from './types';
import { LOOKER, LOOK_ML } from './constants';
import { CSV, LOOKER, LOOK_ML } from './constants';
import { LookerWarning } from './LookerWarning';
import { CSVInfo } from './CSVInfo';
export const ControlsContainer = styled.div`
display: flex;
@ -81,6 +82,8 @@ function RecipeBuilder(props: Props) {
return (
<div>
{(type === LOOKER || type === LOOK_ML) && <LookerWarning type={type} />}
{type === CSV && <CSVInfo />}
<HeaderContainer>
<Title style={{ marginBottom: 0 }} level={5}>
{sourceConfigs?.displayName} Recipe

View File

@ -83,7 +83,7 @@ import {
PROJECT_NAME,
} from './lookml';
import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto';
import { BIGQUERY_BETA, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants';
import { BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants';
import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta';
import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql';
import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql';
@ -140,6 +140,7 @@ import {
INCLUDE_VIEW_LINEAGE,
INCLUDE_PROJECTIONS_LINEAGE,
} from './vertica';
import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILE_URL, CSV_WRITE_SEMANTICS } from './csv';
export enum RecipeSections {
Connection = 0,
@ -453,6 +454,11 @@ export const RECIPE_FIELDS: RecipeFields = {
],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables, Views and Projections from ingestion.',
},
[CSV]: {
fields: [CSV_FILE_URL],
filterFields: [],
advancedFields: [CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS],
},
};
export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS));

View File

@ -0,0 +1,60 @@
import { RecipeField, FieldType } from './common';
const validateURL = (fieldName) => {
return {
validator(_, value) {
const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/);
const isURLValid = URLPattern.test(value);
if (!value || isURLValid) {
return Promise.resolve();
}
return Promise.reject(new Error(`A valid ${fieldName} is required.`));
},
};
};
export const CSV_FILE_URL: RecipeField = {
name: 'filename',
label: 'File URL',
tooltip: 'File URL of the CSV file to ingest.',
type: FieldType.TEXT,
fieldPath: 'source.config.filename',
placeholder: 'File URL',
required: true,
rules: [() => validateURL('File URL')],
};
export const CSV_ARRAY_DELIMITER: RecipeField = {
name: 'array_delimiter',
label: 'Array delimiter',
tooltip: 'Delimiter to use when parsing array fields (tags, terms and owners)',
type: FieldType.TEXT,
fieldPath: 'source.config.array_delimiter',
placeholder: 'Array delimiter',
rules: null,
};
export const CSV_DELIMITER: RecipeField = {
name: 'delimiter',
label: 'Delimiter',
tooltip: 'Delimiter to use when parsing CSV',
type: FieldType.TEXT,
fieldPath: 'source.config.delimiter',
placeholder: 'Delimiter',
rules: null,
};
export const CSV_WRITE_SEMANTICS: RecipeField = {
name: 'write_semantics',
label: 'Write Semantics',
tooltip:
'Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be "PATCH" or "OVERRIDE"',
type: FieldType.SELECT,
options: [
{ label: 'PATCH', value: 'PATCH' },
{ label: 'OVERRIDE', value: 'OVERRIDE' },
],
fieldPath: 'source.config.write_semantics',
placeholder: 'Write Semantics',
rules: null,
};

View File

@ -30,6 +30,7 @@ import verticaLogo from '../../../../images/verticalogo.png';
import mlflowLogo from '../../../../images/mlflowlogo.png';
import dynamodbLogo from '../../../../images/dynamodblogo.png';
import fivetranLogo from '../../../../images/fivetranlogo.png';
import csvLogo from '../../../../images/csv-logo.png';
export const ATHENA = 'athena';
export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
@ -108,6 +109,8 @@ export const VERTICA = 'vertica';
export const VERTICA_URN = `urn:li:dataPlatform:${VERTICA}`;
export const FIVETRAN = 'fivetran';
export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`;
export const CSV = 'csv-enricher';
export const CSV_URN = `urn:li:dataPlatform:${CSV}`;
export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
@ -142,6 +145,7 @@ export const PLATFORM_URN_TO_LOGO = {
[UNITY_CATALOG_URN]: databricksLogo,
[VERTICA_URN]: verticaLogo,
[FIVETRAN_URN]: fivetranLogo,
[CSV_URN]: csvLogo,
};
export const SOURCE_TO_PLATFORM_URN = {

View File

@ -223,6 +223,13 @@
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/",
"recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV"
},
{
"urn": "urn:li:dataPlatform:csv-enricher",
"name": "csv-enricher",
"displayName": "CSV",
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'",
"recipe": "source: \n type: csv-enricher \n config: \n # URL of your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH"
},
{
"urn": "urn:li:dataPlatform:custom",
"name": "custom",

View File

@ -0,0 +1,22 @@
import { SourceConfig } from '../types';
import csvLogo from '../../../../../images/csv-logo.png';
const placeholderRecipe = `\
source:
type: csv-enricher
config:
filename: # URL of your csv file to ingest, e.g. https://docs.google.com/spreadsheets/d/DOCID/export?format=csv
array_delimiter: |
delimiter: ,
write_semantics: PATCH
`;
const csvConfig: SourceConfig = {
type: 'csv-enricher',
placeholderRecipe,
displayName: 'CSV',
docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv',
logoUrl: csvLogo,
};
export default csvConfig;

View File

@ -16,6 +16,7 @@ import { SourceConfig } from './types';
import hiveConfig from './hive/hive';
import oracleConfig from './oracle/oracle';
import tableauConfig from './tableau/tableau';
import csvConfig from './csv/csv';
const baseUrl = window.location.origin;
@ -46,6 +47,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array<SourceConfig> = [
glueConfig,
oracleConfig,
hiveConfig,
csvConfig,
{
type: 'custom',
placeholderRecipe: DEFAULT_PLACEHOLDER_RECIPE,

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -574,5 +574,15 @@
"type": "OTHERS",
"logoUrl": "/assets/platforms/fivetranlogo.png"
}
},
{
"urn": "urn:li:dataPlatform:csv",
"aspect": {
"datasetNameDelimiter": ".",
"name": "csv",
"displayName": "CSV",
"type": "OTHERS",
"logoUrl": "/assets/platforms/csv-logo.png"
}
}
]