mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-05 23:23:42 +00:00
extracts high quality suggestions. refactors implementation for getFieldSuggestions: abstractions individual steps. creates a composable arrayReduce abstraction
This commit is contained in:
parent
e818092133
commit
b617fcca01
@ -1,7 +1,6 @@
|
|||||||
import Ember from 'ember';
|
import Ember from 'ember';
|
||||||
import DatasetTableRow from 'wherehows-web/components/dataset-table-row';
|
import DatasetTableRow from 'wherehows-web/components/dataset-table-row';
|
||||||
import {
|
import {
|
||||||
fieldIdentifierTypeValues,
|
|
||||||
fieldIdentifierTypeIds,
|
fieldIdentifierTypeIds,
|
||||||
defaultFieldDataTypeClassification,
|
defaultFieldDataTypeClassification,
|
||||||
isMixedId,
|
isMixedId,
|
||||||
@ -12,6 +11,11 @@ import {
|
|||||||
SuggestionIntent
|
SuggestionIntent
|
||||||
} from 'wherehows-web/constants';
|
} from 'wherehows-web/constants';
|
||||||
import { fieldChangeSetRequiresReview } from 'wherehows-web/utils/datasets/compliance-policy';
|
import { fieldChangeSetRequiresReview } from 'wherehows-web/utils/datasets/compliance-policy';
|
||||||
|
import { compact } from 'wherehows-web/utils/array';
|
||||||
|
import {
|
||||||
|
highConfidenceSuggestions,
|
||||||
|
accumulateFieldSuggestions
|
||||||
|
} from 'wherehows-web/utils/datasets/compliance-suggestions';
|
||||||
|
|
||||||
const { computed, get, getProperties } = Ember;
|
const { computed, get, getProperties } = Ember;
|
||||||
|
|
||||||
@ -21,24 +25,7 @@ const { computed, get, getProperties } = Ember;
|
|||||||
* @param {Array<Object>} predictions
|
* @param {Array<Object>} predictions
|
||||||
* @returns Array<Object>
|
* @returns Array<Object>
|
||||||
*/
|
*/
|
||||||
const getFieldSuggestions = predictions =>
|
const getFieldSuggestions = predictions => accumulateFieldSuggestions(highConfidenceSuggestions(compact(predictions)));
|
||||||
predictions.filter(prediction => prediction).reduce((suggested, { value, confidence = 0 }) => {
|
|
||||||
if (value) {
|
|
||||||
if (fieldIdentifierTypeValues.includes(value)) {
|
|
||||||
suggested = { ...suggested, identifierType: value };
|
|
||||||
} else {
|
|
||||||
suggested = { ...suggested, logicalType: value };
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
...suggested,
|
|
||||||
// value is Percent. identifierType value should be the last element in the list
|
|
||||||
confidence: (confidence * 100).toFixed(2)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return suggested;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
export default DatasetTableRow.extend({
|
export default DatasetTableRow.extend({
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -44,6 +44,12 @@ interface IFieldIdTypes {
|
|||||||
[prop: string]: IFieldIdProps;
|
[prop: string]: IFieldIdProps;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Percentage value for a compliance policy suggestion with a low confidence score
|
||||||
|
* @type {number}
|
||||||
|
*/
|
||||||
|
const lowQualitySuggestionConfidenceThreshold = 0.5;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A list of id logical types
|
* A list of id logical types
|
||||||
* @type {Array.<String>}
|
* @type {Array.<String>}
|
||||||
@ -344,5 +350,6 @@ export {
|
|||||||
logicalTypesForIds,
|
logicalTypesForIds,
|
||||||
logicalTypesForGeneric,
|
logicalTypesForGeneric,
|
||||||
getDefaultLogicalType,
|
getDefaultLogicalType,
|
||||||
SuggestionIntent
|
SuggestionIntent,
|
||||||
|
lowQualitySuggestionConfidenceThreshold
|
||||||
};
|
};
|
||||||
|
|||||||
@ -10,6 +10,19 @@ interface IPrediction {
|
|||||||
value: string;
|
value: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Describes the interface for a field suggestion
|
||||||
|
* values for the keys are extracted from the JSON response
|
||||||
|
* from the compliance/suggestion endpoint and do not necessarily match up
|
||||||
|
* with the key [`identifierTypePrediction` | `logicalTypePrediction`] predicted values
|
||||||
|
* @link extractTypesSuggestion gives further detail
|
||||||
|
*/
|
||||||
|
interface IFieldSuggestion {
|
||||||
|
identifierType?: string;
|
||||||
|
logicalType?: string;
|
||||||
|
confidence: number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Describes shape of a compliance auto suggestion
|
* Describes shape of a compliance auto suggestion
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -14,6 +14,17 @@ const arrayMap = <T, U>(mappingFunction: (param: T) => U): ((array: Array<T>) =>
|
|||||||
const arrayFilter = <T>(filtrationFunction: (param: T) => boolean): ((array: Array<T>) => Array<T>) => (array = []) =>
|
const arrayFilter = <T>(filtrationFunction: (param: T) => boolean): ((array: Array<T>) => Array<T>) => (array = []) =>
|
||||||
array.filter(filtrationFunction);
|
array.filter(filtrationFunction);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Composable reducer abstraction, curries a reducing iteratee and returns a reducing function that takes a list
|
||||||
|
* @param {(acc: U) => U} iteratee
|
||||||
|
* @param {U} init the initial value in the reduction sequence
|
||||||
|
* @return {(arr: Array<T>) => U}
|
||||||
|
*/
|
||||||
|
const arrayReduce = <T, U>(
|
||||||
|
iteratee: (accumulator: U, element: T, index: number, collection: Array<T>) => U,
|
||||||
|
init: U
|
||||||
|
): ((arr: Array<T>) => U) => (array = []) => array.reduce(iteratee, init);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Duplicate check using every to short-circuit iteration
|
* Duplicate check using every to short-circuit iteration
|
||||||
* @param {Array<T>} [list = []] list to check for dupes
|
* @param {Array<T>} [list = []] list to check for dupes
|
||||||
@ -21,4 +32,11 @@ const arrayFilter = <T>(filtrationFunction: (param: T) => boolean): ((array: Arr
|
|||||||
*/
|
*/
|
||||||
const isListUnique = <T>(list: Array<T> = []): boolean => new Set(list).size === list.length;
|
const isListUnique = <T>(list: Array<T> = []): boolean => new Set(list).size === list.length;
|
||||||
|
|
||||||
export { arrayMap, arrayFilter, isListUnique };
|
/**
|
||||||
|
* Extracts all non falsey values from a list.
|
||||||
|
* @param {Array<T>} list the list of items to compact
|
||||||
|
* @return {Array<T>}
|
||||||
|
*/
|
||||||
|
const compact = <T>(list: Array<T> = []): Array<T> => list.filter(item => item);
|
||||||
|
|
||||||
|
export { arrayMap, arrayFilter, arrayReduce, isListUnique, compact };
|
||||||
|
|||||||
49
wherehows-web/app/utils/datasets/compliance-suggestions.ts
Normal file
49
wherehows-web/app/utils/datasets/compliance-suggestions.ts
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
import { IFieldSuggestion, IPrediction } from 'wherehows-web/typings/api/datasets/compliance';
|
||||||
|
import { arrayFilter, arrayReduce } from 'wherehows-web/utils/array';
|
||||||
|
import { fieldIdentifierTypeValues, lowQualitySuggestionConfidenceThreshold } from 'wherehows-web/constants';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a list of suggestions with confidence values, and if the confidence is greater than
|
||||||
|
* a low confidence threshold
|
||||||
|
* @param {number} confidence
|
||||||
|
* @return {boolean}
|
||||||
|
*/
|
||||||
|
const isHighConfidenceSuggestion = ({ confidence = 0 }: IPrediction): boolean =>
|
||||||
|
confidence > lowQualitySuggestionConfidenceThreshold;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters out a list of IPrediction 's that have a confidence level higher than the low confidence threshold
|
||||||
|
* @type {(array: Array<IPrediction>) => Array<IPrediction>}
|
||||||
|
*/
|
||||||
|
const highConfidenceSuggestions = arrayFilter(isHighConfidenceSuggestion);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the type (identifierType&|logicalType) suggestion and confidence value from a predicted or suggested object.
|
||||||
|
* A determination is made based of the type of the value string, rather than the keys of the wrapping object:
|
||||||
|
* `identifierTypePrediction` or `logicalTypePrediction`.
|
||||||
|
* This is important to pay attention to when modifying the implementation
|
||||||
|
* @param {IFieldSuggestion} suggestion the extracted suggestion
|
||||||
|
* @param {string} value the value in the api provided suggestion
|
||||||
|
* @param {number} confidence how confidence the system is in the suggested value
|
||||||
|
* @return {IFieldSuggestion}
|
||||||
|
*/
|
||||||
|
const extractTypesSuggestion = (
|
||||||
|
suggestion: IFieldSuggestion,
|
||||||
|
{ value, confidence = 0 }: IPrediction
|
||||||
|
): IFieldSuggestion => {
|
||||||
|
if (value) {
|
||||||
|
if (fieldIdentifierTypeValues.includes(value)) {
|
||||||
|
suggestion = { ...suggestion, identifierType: value };
|
||||||
|
} else {
|
||||||
|
suggestion = { ...suggestion, logicalType: value };
|
||||||
|
}
|
||||||
|
|
||||||
|
// identifierType value should be the last element in the list
|
||||||
|
return { ...suggestion, confidence: +(confidence * 100).toFixed(2) };
|
||||||
|
}
|
||||||
|
return suggestion;
|
||||||
|
};
|
||||||
|
|
||||||
|
const accumulateFieldSuggestions = arrayReduce(extractTypesSuggestion, <IFieldSuggestion>{ confidence: 0 });
|
||||||
|
|
||||||
|
export { highConfidenceSuggestions, accumulateFieldSuggestions };
|
||||||
@ -1,5 +1,5 @@
|
|||||||
import { module, test } from 'qunit';
|
import { module, test } from 'qunit';
|
||||||
import { arrayMap, arrayFilter, isListUnique } from 'wherehows-web/utils/array';
|
import { arrayMap, arrayFilter, arrayReduce, isListUnique } from 'wherehows-web/utils/array';
|
||||||
import { xRandomNumbers, numToString, isAString } from 'wherehows-web/tests/helpers/arrays/functions';
|
import { xRandomNumbers, numToString, isAString } from 'wherehows-web/tests/helpers/arrays/functions';
|
||||||
|
|
||||||
module('Unit | Utility | array');
|
module('Unit | Utility | array');
|
||||||
@ -45,3 +45,17 @@ test('isListUnique correctly tests uniqueness of a list', function(assert) {
|
|||||||
assert.notOk(isListUnique(listWithDuplicateNumbers), `${listWithDuplicateNumbers} has duplicates`);
|
assert.notOk(isListUnique(listWithDuplicateNumbers), `${listWithDuplicateNumbers} has duplicates`);
|
||||||
assert.ok(isListUnique(listWithoutDuplicateNumbers), `${listWithoutDuplicateNumbers} has no duplicates`);
|
assert.ok(isListUnique(listWithoutDuplicateNumbers), `${listWithoutDuplicateNumbers} has no duplicates`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('arrayReduce is a function', function(assert) {
|
||||||
|
assert.ok(typeof arrayReduce === 'function', 'module exports an array reducer function');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('arrayReduce should work as a reduction iteratee', function(assert) {
|
||||||
|
const array = [{ a: 1 }, { b: 2 }, { c: 3 }],
|
||||||
|
expected = { a: 1, b: 2, c: 3 };
|
||||||
|
const reducer = arrayReduce(function(acc, el) {
|
||||||
|
return { ...acc, ...el };
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
assert.deepEqual(reducer(array), expected);
|
||||||
|
});
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user