mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-12 10:35:51 +00:00
feat: Support CSV ingestion through the UI (#9280)
Co-authored-by: Gabe Lyons <itsgabelyons@gmail.com>
This commit is contained in:
parent
7857944bb5
commit
f9b24e0724
27
datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx
Normal file
27
datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx
Normal file
@ -0,0 +1,27 @@
|
||||
import React from 'react';
|
||||
import { Alert } from 'antd';
|
||||
|
||||
const CSV_FORMAT_LINK = 'https://datahubproject.io/docs/generated/ingestion/sources/csv';
|
||||
|
||||
export const CSVInfo = () => {
|
||||
const link = (
|
||||
<a href={CSV_FORMAT_LINK} target="_blank" rel="noopener noreferrer">
|
||||
link
|
||||
</a>
|
||||
);
|
||||
|
||||
return (
|
||||
<Alert
|
||||
style={{ marginBottom: '10px' }}
|
||||
type="warning"
|
||||
banner
|
||||
message={
|
||||
<>
|
||||
Add the URL of your CSV file to be ingested. This will work for any web-hosted CSV file. For
|
||||
example, You can create a file in google sheets following the format at this {link} and then
|
||||
construct the CSV URL by publishing your google sheet in the CSV format.
|
||||
</>
|
||||
}
|
||||
/>
|
||||
);
|
||||
};
|
||||
@ -7,8 +7,9 @@ import { ANTD_GRAY } from '../../../entity/shared/constants';
|
||||
import { YamlEditor } from './YamlEditor';
|
||||
import RecipeForm from './RecipeForm/RecipeForm';
|
||||
import { SourceBuilderState, SourceConfig } from './types';
|
||||
import { LOOKER, LOOK_ML } from './constants';
|
||||
import { CSV, LOOKER, LOOK_ML } from './constants';
|
||||
import { LookerWarning } from './LookerWarning';
|
||||
import { CSVInfo } from './CSVInfo';
|
||||
|
||||
export const ControlsContainer = styled.div`
|
||||
display: flex;
|
||||
@ -81,6 +82,8 @@ function RecipeBuilder(props: Props) {
|
||||
return (
|
||||
<div>
|
||||
{(type === LOOKER || type === LOOK_ML) && <LookerWarning type={type} />}
|
||||
{type === CSV && <CSVInfo />}
|
||||
|
||||
<HeaderContainer>
|
||||
<Title style={{ marginBottom: 0 }} level={5}>
|
||||
{sourceConfigs?.displayName} Recipe
|
||||
|
||||
@ -83,7 +83,7 @@ import {
|
||||
PROJECT_NAME,
|
||||
} from './lookml';
|
||||
import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto';
|
||||
import { BIGQUERY_BETA, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants';
|
||||
import { BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants';
|
||||
import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta';
|
||||
import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql';
|
||||
import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql';
|
||||
@ -140,6 +140,7 @@ import {
|
||||
INCLUDE_VIEW_LINEAGE,
|
||||
INCLUDE_PROJECTIONS_LINEAGE,
|
||||
} from './vertica';
|
||||
import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILE_URL, CSV_WRITE_SEMANTICS } from './csv';
|
||||
|
||||
export enum RecipeSections {
|
||||
Connection = 0,
|
||||
@ -453,6 +454,11 @@ export const RECIPE_FIELDS: RecipeFields = {
|
||||
],
|
||||
filterSectionTooltip: 'Include or exclude specific Schemas, Tables, Views and Projections from ingestion.',
|
||||
},
|
||||
[CSV]: {
|
||||
fields: [CSV_FILE_URL],
|
||||
filterFields: [],
|
||||
advancedFields: [CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS],
|
||||
},
|
||||
};
|
||||
|
||||
export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS));
|
||||
|
||||
@ -0,0 +1,60 @@
|
||||
import { RecipeField, FieldType } from './common';
|
||||
|
||||
const validateURL = (fieldName) => {
|
||||
return {
|
||||
validator(_, value) {
|
||||
const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/);
|
||||
const isURLValid = URLPattern.test(value);
|
||||
if (!value || isURLValid) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return Promise.reject(new Error(`A valid ${fieldName} is required.`));
|
||||
},
|
||||
};
|
||||
};
|
||||
|
||||
export const CSV_FILE_URL: RecipeField = {
|
||||
name: 'filename',
|
||||
label: 'File URL',
|
||||
tooltip: 'File URL of the CSV file to ingest.',
|
||||
type: FieldType.TEXT,
|
||||
fieldPath: 'source.config.filename',
|
||||
placeholder: 'File URL',
|
||||
required: true,
|
||||
rules: [() => validateURL('File URL')],
|
||||
};
|
||||
|
||||
export const CSV_ARRAY_DELIMITER: RecipeField = {
|
||||
name: 'array_delimiter',
|
||||
label: 'Array delimiter',
|
||||
tooltip: 'Delimiter to use when parsing array fields (tags, terms and owners)',
|
||||
type: FieldType.TEXT,
|
||||
fieldPath: 'source.config.array_delimiter',
|
||||
placeholder: 'Array delimiter',
|
||||
rules: null,
|
||||
};
|
||||
|
||||
export const CSV_DELIMITER: RecipeField = {
|
||||
name: 'delimiter',
|
||||
label: 'Delimiter',
|
||||
tooltip: 'Delimiter to use when parsing CSV',
|
||||
type: FieldType.TEXT,
|
||||
fieldPath: 'source.config.delimiter',
|
||||
placeholder: 'Delimiter',
|
||||
rules: null,
|
||||
};
|
||||
|
||||
export const CSV_WRITE_SEMANTICS: RecipeField = {
|
||||
name: 'write_semantics',
|
||||
label: 'Write Semantics',
|
||||
tooltip:
|
||||
'Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be "PATCH" or "OVERRIDE"',
|
||||
type: FieldType.SELECT,
|
||||
options: [
|
||||
{ label: 'PATCH', value: 'PATCH' },
|
||||
{ label: 'OVERRIDE', value: 'OVERRIDE' },
|
||||
],
|
||||
fieldPath: 'source.config.write_semantics',
|
||||
placeholder: 'Write Semantics',
|
||||
rules: null,
|
||||
};
|
||||
@ -30,6 +30,7 @@ import verticaLogo from '../../../../images/verticalogo.png';
|
||||
import mlflowLogo from '../../../../images/mlflowlogo.png';
|
||||
import dynamodbLogo from '../../../../images/dynamodblogo.png';
|
||||
import fivetranLogo from '../../../../images/fivetranlogo.png';
|
||||
import csvLogo from '../../../../images/csv-logo.png';
|
||||
|
||||
export const ATHENA = 'athena';
|
||||
export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
|
||||
@ -108,6 +109,8 @@ export const VERTICA = 'vertica';
|
||||
export const VERTICA_URN = `urn:li:dataPlatform:${VERTICA}`;
|
||||
export const FIVETRAN = 'fivetran';
|
||||
export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`;
|
||||
export const CSV = 'csv-enricher';
|
||||
export const CSV_URN = `urn:li:dataPlatform:${CSV}`;
|
||||
|
||||
export const PLATFORM_URN_TO_LOGO = {
|
||||
[ATHENA_URN]: athenaLogo,
|
||||
@ -142,6 +145,7 @@ export const PLATFORM_URN_TO_LOGO = {
|
||||
[UNITY_CATALOG_URN]: databricksLogo,
|
||||
[VERTICA_URN]: verticaLogo,
|
||||
[FIVETRAN_URN]: fivetranLogo,
|
||||
[CSV_URN]: csvLogo,
|
||||
};
|
||||
|
||||
export const SOURCE_TO_PLATFORM_URN = {
|
||||
|
||||
@ -223,6 +223,13 @@
|
||||
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/",
|
||||
"recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV"
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:dataPlatform:csv-enricher",
|
||||
"name": "csv-enricher",
|
||||
"displayName": "CSV",
|
||||
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'",
|
||||
"recipe": "source: \n type: csv-enricher \n config: \n # URL of your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH"
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:dataPlatform:custom",
|
||||
"name": "custom",
|
||||
|
||||
22
datahub-web-react/src/app/ingest/source/conf/csv/csv.ts
Normal file
22
datahub-web-react/src/app/ingest/source/conf/csv/csv.ts
Normal file
@ -0,0 +1,22 @@
|
||||
import { SourceConfig } from '../types';
|
||||
import csvLogo from '../../../../../images/csv-logo.png';
|
||||
|
||||
const placeholderRecipe = `\
|
||||
source:
|
||||
type: csv-enricher
|
||||
config:
|
||||
filename: # URL of your csv file to ingest, e.g. https://docs.google.com/spreadsheets/d/DOCID/export?format=csv
|
||||
array_delimiter: |
|
||||
delimiter: ,
|
||||
write_semantics: PATCH
|
||||
`;
|
||||
|
||||
const csvConfig: SourceConfig = {
|
||||
type: 'csv-enricher',
|
||||
placeholderRecipe,
|
||||
displayName: 'CSV',
|
||||
docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv',
|
||||
logoUrl: csvLogo,
|
||||
};
|
||||
|
||||
export default csvConfig;
|
||||
@ -16,6 +16,7 @@ import { SourceConfig } from './types';
|
||||
import hiveConfig from './hive/hive';
|
||||
import oracleConfig from './oracle/oracle';
|
||||
import tableauConfig from './tableau/tableau';
|
||||
import csvConfig from './csv/csv';
|
||||
|
||||
const baseUrl = window.location.origin;
|
||||
|
||||
@ -46,6 +47,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array<SourceConfig> = [
|
||||
glueConfig,
|
||||
oracleConfig,
|
||||
hiveConfig,
|
||||
csvConfig,
|
||||
{
|
||||
type: 'custom',
|
||||
placeholderRecipe: DEFAULT_PLACEHOLDER_RECIPE,
|
||||
|
||||
BIN
datahub-web-react/src/images/csv-logo.png
Normal file
BIN
datahub-web-react/src/images/csv-logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 12 KiB |
@ -574,5 +574,15 @@
|
||||
"type": "OTHERS",
|
||||
"logoUrl": "/assets/platforms/fivetranlogo.png"
|
||||
}
|
||||
},
|
||||
{
|
||||
"urn": "urn:li:dataPlatform:csv",
|
||||
"aspect": {
|
||||
"datasetNameDelimiter": ".",
|
||||
"name": "csv",
|
||||
"displayName": "CSV",
|
||||
"type": "OTHERS",
|
||||
"logoUrl": "/assets/platforms/csv-logo.png"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user