Merge pull request #12754 from strapi/translations-cleanup/interactive-duplicates-merge

feat: interactively merge duplicated translations
This commit is contained in:
Vincent 2022-03-14 10:20:42 +01:00 committed by GitHub
commit 221b894d0f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 381 additions and 93 deletions

View File

@ -1,93 +0,0 @@
'use strict';
const chalk = require('chalk');
const { merge } = require('lodash/fp');
const { readAllTranslationFiles } = require('./utils/translation-files');
const printResults = results => {
let valuesCount = 0;
let keysCount = 0;
results.forEach(values => {
Object.entries(values).forEach(([value, pkgs]) => {
Object.entries(pkgs).forEach(([packageName, keys]) => {
keys.forEach(key => {
console.log(`"${chalk.yellow(value)}" ${packageName} ${chalk.blue(key)}`);
keysCount++;
});
});
valuesCount++;
console.log();
});
});
console.log(`${valuesCount} duplicated values`);
console.log(`${keysCount} keys can be merged`);
};
const getDuplicatesObject = (prevDups = {}, { f1Key, f2Keys, f1PackageName, f2PackageName }) => {
const f1PackagePrevDups = prevDups[f1PackageName] || [];
const f2PackagePrevDups = prevDups[f2PackageName] || [];
const duplicates = {};
// Merge and spread duplicate keys array to sets to remove duplicates
duplicates[f1PackageName] = new Set([...f1PackagePrevDups, f1Key]);
duplicates[f2PackageName] = new Set([...f2PackagePrevDups, ...f2Keys]);
return duplicates;
};
const findDuplicates = (file1, file2, { sameFile } = { sameFile: false }) => {
const dupValues = {};
// Find in file2 duplicates of every file1 value
// Format a duplicate object and add it to the dupValues object with translation value as key
Object.entries(file1.fileContent).forEach(([f1Key, f1Value]) => {
// Match translations with the same value
// Skip translations with identical key in a same file (avoid matching itself)
// Get an array of keys
const f2Keys = Object.entries(file2.fileContent)
.filter(([, f2Value]) => f2Value === f1Value)
.filter(([f2Key]) => !sameFile || f1Key !== f2Key)
.map(([f2Key]) => f2Key);
// Add a duplicate value to dupValues if duplicates have been found
if (f2Keys.length > 0) {
dupValues[f1Value] = getDuplicatesObject(dupValues[f1Value], {
f1Key,
f2Keys,
f1PackageName: file1.packageName,
f2PackageName: file2.packageName,
});
}
});
return dupValues;
};
const findDuplicateTranslationValues = () => {
const files = readAllTranslationFiles();
// Separate core/admin file from plugin files
const [coreFile] = files.splice(files.findIndex(file => file.packageName === 'core/admin'), 1);
const pluginFiles = files;
// Find duplicates inside every file separately
const coreAdminDuplicates = findDuplicates(coreFile, coreFile, { sameFile: true });
const pluginsDuplicates = pluginFiles.map(pluginFile =>
findDuplicates(pluginFile, pluginFile, { sameFile: true })
);
// Find duplicates between core/admin and every plugin file
// Merge the results with core/admin duplicates to avoid showing the same key twice
// (in case core/admin contains duplicate values that also exists in a plugin)
let crossPackagesDuplicates = coreAdminDuplicates;
pluginFiles.forEach(file => {
crossPackagesDuplicates = merge(crossPackagesDuplicates, findDuplicates(coreFile, file));
});
return [crossPackagesDuplicates, ...pluginsDuplicates];
};
// Entrypoint
printResults(findDuplicateTranslationValues());

View File

@ -0,0 +1,5 @@
'use strict';
// Keys that are allowed to contain duplicated values
// Not to be handled as duplicates by cleanup scripts
module.exports = ['plugin.name'];

View File

@ -0,0 +1,100 @@
'use strict';
const chalk = require('chalk');
const { merge } = require('lodash/fp');
const { readAllTranslationFiles } = require('../utils/translation-files');
const allowedKeys = require('./allowed-keys');
const printResults = results => {
let valuesCount = 0;
let keysCount = 0;
Object.entries(results).forEach(([value, pkgs]) => {
Object.entries(pkgs).forEach(([packageName, keys]) => {
keys.forEach(key => {
console.log(`"${chalk.yellow(value)}" ${packageName} ${chalk.blue(key)}`);
keysCount++;
});
});
valuesCount++;
console.log();
});
console.log(`${valuesCount} duplicated values`);
console.log(`${keysCount} keys can be merged`);
};
const getDuplicatesObject = (prevDups = {}, { f1Key, f2Keys, f1PackageName, f2PackageName }) => {
const f1PackagePrevDups = prevDups[f1PackageName] || [];
const f2PackagePrevDups = prevDups[f2PackageName] || [];
const duplicates = {};
// Merge and spread duplicate keys array to sets to remove duplicates
duplicates[f1PackageName] = new Set([...f1PackagePrevDups, f1Key]);
duplicates[f2PackageName] = new Set([...f2PackagePrevDups, ...f2Keys]);
return duplicates;
};
const findDuplicates = (file1, file2, { sameFile } = { sameFile: false }) => {
const dupValues = {};
// Find in file2 duplicates of every file1 value
// Format a duplicate object and add it to the dupValues object with translation value as key
Object.entries(file1.fileContent)
.filter(([f1Key]) => !allowedKeys.includes(f1Key))
.forEach(([f1Key, f1Value]) => {
// Match translations with the same value
// Skip translations with identical key in a same file (avoid matching itself)
// Get an array of keys
const f2Keys = Object.entries(file2.fileContent)
.filter(([f2Key]) => !allowedKeys.includes(f2Key))
.filter(([, f2Value]) => f2Value === f1Value)
.filter(([f2Key]) => !sameFile || f1Key !== f2Key)
.map(([f2Key]) => f2Key);
// Add a duplicate value to dupValues if duplicates have been found
if (f2Keys.length > 0) {
dupValues[f1Value] = getDuplicatesObject(dupValues[f1Value], {
f1Key,
f2Keys,
f1PackageName: file1.packageName,
f2PackageName: file2.packageName,
});
}
});
return dupValues;
};
const findDuplicatedTranslations = () => {
const files = readAllTranslationFiles();
// Separate core/admin file from plugin files
const [coreFile] = files.splice(
files.findIndex(file => file.packageName === 'core/admin'),
1
);
const pluginFiles = files;
// Find duplicates inside every file separately
const coreAdminDuplicates = findDuplicates(coreFile, coreFile, { sameFile: true });
let crossPackagesDuplicates = { ...coreAdminDuplicates };
pluginFiles.forEach(pluginFile => {
crossPackagesDuplicates = merge(
crossPackagesDuplicates,
findDuplicates(pluginFile, pluginFile, { sameFile: true })
);
});
// Find duplicates between core/admin and every plugin file
// Merge the results with core/admin duplicates to avoid showing the same key twice
// (in case core/admin contains duplicate values that also exists in a plugin)
pluginFiles.forEach(file => {
crossPackagesDuplicates = merge(crossPackagesDuplicates, findDuplicates(coreFile, file));
});
return crossPackagesDuplicates;
};
module.exports = { findDuplicatedTranslations, printResults };

View File

@ -0,0 +1,154 @@
/* eslint-disable node/no-extraneous-require */
'use strict';
const path = require('path');
const chalk = require('chalk');
const inquirer = require('inquirer');
const { kebabCase } = require('lodash');
const FilesContentSearch = require('../utils/search-files-content');
const { readAllTranslationFiles, writeAllTranslationFiles } = require('../utils/translation-files');
const { findDuplicatedTranslations } = require('./find-duplicated-translation');
const fcs = new FilesContentSearch(
[path.join(__dirname, '../../../')],
['**/*.js'],
['**/node_modules/**', '**/cache/**', '**/build/**']
);
const mapDuplicates = async (duplicatesObject, fn) => {
Object.entries(duplicatesObject).forEach(([value, pkgs]) => fn(value, pkgs));
};
const mapDuplicateValues = async (pkgs, fn) => {
Object.entries(pkgs).forEach(([packageName, keys]) => {
keys.forEach(key => fn(key, packageName));
});
};
const promptShouldMerge = async () => {
return (
await inquirer.prompt({
type: 'confirm',
message: 'Should merge?',
name: 'shouldMerge',
default: false,
})
).shouldMerge;
};
const promptTargetKey = async valueGroup => {
return (
await inquirer.prompt({
type: 'input',
name: 'targetKey',
message: 'Target key name:',
default: `global.${kebabCase(valueGroup[0].value)}`,
})
).targetKey;
};
const printToMerge = valueGroup => {
console.log(`Value: "${chalk.yellow(valueGroup[0].value)}"`);
console.table(
valueGroup.map(keyGroup => ({
key: keyGroup.key,
package: keyGroup.packageName,
usageCount: keyGroup.resultsCount,
}))
);
};
const applyPackageScope = (packageName, searchResults) => {
return searchResults.filter(
result => packageName === 'core/admin' || result.path.includes(packageName)
);
};
// Filters out duplicated transtations that are not in use
const getValuesToMerge = keyUsage =>
keyUsage
.map(value => value.dups.filter(dup => dup.resultsCount > 0))
.filter(value => value.length > 1);
// Returns an array of duplicated translations that are in use in the codebase
// (found in at least one .js files)
const getKeysUsage = duplicatesObject => {
const keyUsage = [];
mapDuplicates(duplicatesObject, (value, pkgs) => {
const dups = [];
mapDuplicateValues(pkgs, (key, packageName) => {
const searchResults = applyPackageScope(packageName, [
...fcs.searchString(`id: '${key}'`),
...fcs.searchString(`id: getTrad('${key}')`),
]);
const resultsCount = searchResults.reduce((acc, cur) => cur.matches.length + acc, 0);
dups.push({
key,
value,
packageName,
resultsCount,
replaceAll: replaceValue =>
searchResults.forEach(result => result.replaceAll(replaceValue)),
});
});
keyUsage.push({ value, dups });
});
return keyUsage;
};
// Handles the merging in translation files
// Removes duplicated translations + creates a new shared key in the core/admin en.json file
const updateTranslationFiles = (keyGroup, targetKey) => {
const translationFiles = {};
readAllTranslationFiles().forEach(file => (translationFiles[file.packageName] = file));
if (translationFiles[keyGroup.packageName].fileContent[keyGroup.key] === keyGroup.value) {
delete translationFiles[keyGroup.packageName].fileContent[keyGroup.key];
}
translationFiles['core/admin'].fileContent[targetKey] = keyGroup.value;
writeAllTranslationFiles(Object.values(translationFiles));
};
// Displays and prompt for every detected duplications
// Triggers the merge if necessary
const merge = async valuesToMerge => {
let current = 1;
let mergedCount = 0;
for (let valueGroup of valuesToMerge) {
// Display
console.clear();
console.log(`${current}/${valuesToMerge.length}`);
printToMerge(valueGroup);
// Prompt and merge
if (await promptShouldMerge()) {
const targetKey = await promptTargetKey(valueGroup);
valueGroup.forEach(keyGroup => {
updateTranslationFiles(keyGroup, targetKey);
keyGroup.replaceAll(`id: '${targetKey}'`);
mergedCount++;
});
}
current++;
}
console.log(`Merged ${mergedCount} keys`);
};
(async () => {
await fcs.loadFiles();
const duplicates = findDuplicatedTranslations();
const keyUsage = getKeysUsage(duplicates);
const valuesToMerge = getValuesToMerge(keyUsage);
await merge(valuesToMerge);
})();

View File

@ -0,0 +1,112 @@
'use strict';
const { promisify } = require('util');
const path = require('path');
const readFile = promisify(require('fs').readFile);
const { readFileSync, writeFileSync } = require('fs');
const _ = require('lodash');
const glob = promisify(require('glob').glob);
const chalk = require('chalk');
const findFilesInDirectories = async (
directories = [],
matchPatterns = [],
ignorePatterns = []
) => {
let files = [];
for (const directory of directories) {
for (const pattern of matchPatterns) {
files = files.concat(await glob(path.join(directory, pattern), { ignore: ignorePatterns }));
}
}
return files;
};
const loadFilesInMemory = async files => {
return Promise.all(
files.map(async file => ({
path: file,
content: (await readFile(file)).toString(),
}))
);
};
const getMatches = (content, matchedString) => {
const lines = content.split('\n');
const highlightedLines = [];
for (const line of lines) {
if (line.includes(matchedString)) {
highlightedLines.push(
line
.split(matchedString)
.join(chalk.bgMagentaBright(matchedString))
.trim()
);
}
}
return highlightedLines;
};
class FilesContentSearch {
constructor(directories, matchPatterns, ignorePatterns) {
this.directories = directories;
this.matchPatterns = matchPatterns;
this.ignorePatterns = ignorePatterns;
}
async loadFiles() {
console.log('Searching for matching files');
this.fileList = await findFilesInDirectories(
this.directories,
this.matchPatterns,
this.ignorePatterns
);
console.log(`Found ${this.fileList.length} files`);
console.log('Loading files content in memory');
this.files = await loadFilesInMemory(this.fileList);
console.log(`Loaded ${this.files.length} files in memory`);
}
search(matchFunction) {
const results = [];
const localFiles = _.cloneDeep(this.files);
for (const file of localFiles) {
const matchedString = matchFunction(file.content);
if (matchedString) {
file.matches = getMatches(file.content, matchedString);
file.replaceAll = replaceValue => {
let fileContent = readFileSync(file.path, { encoding: 'utf-8' });
fileContent = fileContent.replaceAll(matchedString, replaceValue);
writeFileSync(file.path, fileContent, { encoding: 'utf-8' });
};
results.push(file);
}
}
return results;
}
searchString(string) {
return this.search(fileContent => {
if (fileContent.includes(string)) return string;
return null;
});
}
}
module.exports = FilesContentSearch;
// Usage example
// const fcs = new FilesContentSearch(
// [path.join(__dirname, '../../../')],
// ['**/*.js'],
// ['**/node_modules/**', '**/cache/**', '**/build/**']
// );
// await fcs.loadFiles();
// const results = await fcs.searchString(yolo);

View File

@ -17,6 +17,10 @@ const readTranslationFile = filePath => ({
fileContent: JSON.parse(fs.readFileSync(filePath).toString('utf-8')),
});
const writeTranslationFile = file => {
fs.writeFileSync(file.filePath, JSON.stringify(file.fileContent, null, 2) + '\n');
};
const readAllTranslationFiles = () => {
const translationFilesPaths = [
...glob.sync(path.join(PACKAGES_DIR_PATH, 'core/*/', TRANSLATION_FILE_PATH)),
@ -26,7 +30,13 @@ const readAllTranslationFiles = () => {
return translationFilesPaths.map(readTranslationFile);
};
const writeAllTranslationFiles = files => {
files.forEach(writeTranslationFile);
};
module.exports = {
readTranslationFile,
writeTranslationFile,
readAllTranslationFiles,
writeAllTranslationFiles,
};