Merge pull request #4980 from changy1105/dygraph

[TIPC] Add js infer test
2025-11-07 05:13:29 +00:00 · 2021-12-23 14:05:46 +08:00 · 2021-12-23 14:05:46 +08:00 · 61b07f16d7
commit 61b07f16d7
parent b9111d3956 bbcabd8145
10 changed files with 392 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -29,3 +29,5 @@ paddleocr.egg-info/
 /deploy/android_demo/app/PaddleLite/
 /deploy/android_demo/app/.cxx/
 /deploy/android_demo/app/cache/
 test_tipc/web/models/
 test_tipc/web/node_modules/
--- a/test_tipc/docs/test_inference_js.md
+++ b/test_tipc/docs/test_inference_js.md
@ -0,0 +1,50 @@
 # Web 端基础预测功能测试
 Web 端主要基于 Jest-Puppeteer 完成 e2e 测试，其中 Puppeteer 操作 Chrome 完成推理流程，Jest 完成测试流程。
 >Puppeteer 是一个 Node 库，它提供了一个高级 API 来通过 DevTools 协议控制 Chromium 或 Chrome
 >Jest 是一个 JavaScript 测试框架，旨在确保任何 JavaScript 代码的正确性。
 #### 环境准备
 * 安装 Node（包含 npm ） （https://nodejs.org/zh-cn/download/）
 * 确认是否安装成功，在命令行执行
 ```sh
 # 显示所安 node 版本号，即表示成功安装
 node -v
 ```
 * 确认 npm 是否安装成成
 ```sh
 # npm 随着 node 一起安装，一般无需额外安装
 # 显示所安 npm 版本号，即表示成功安装
 npm -v
 ```
 #### 使用
 ```sh
 # web 测试环境准备
 bash test_tipc/prepare_js.sh 'js_infer'
 # web 推理测试
 bash test_tipc/test_inference_js.sh
 ```
 #### 流程设计
 ###### paddlejs prepare
 1. 判断 node, npm 是否安装
 2. 下载测试模型，当前检测模型是 ch_PP-OCRv2_det_infer ，识别模型是 ch_PP-OCRv2_rec_infer[1, 3, 32, 320]。如果需要替换模型，可直接将模型文件放在test_tipc/web/models/目录下。
  - 文本检测模型：https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
  - 文本识别模型：https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
  - 文本识别模型[1, 3, 32, 320]：https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar
  - 保证较为准确的识别效果，需要将文本识别模型导出为输入shape是[1, 3, 32, 320]的静态模型
 3. 转换模型， model.pdmodel model.pdiparams 转换为 model.json chunk.dat（检测模型保存地址：test_tipc/web/models/ch_PP-OCRv2/det，识别模型保存地址：test_tipc/web/models/ch_PP-OCRv2/rec）
 4. 安装最新版本 ocr sdk  @paddlejs-models/ocr@latest
 5. 安装测试环境依赖 puppeteer、jest、jest-puppeteer，如果检查到已经安装，则不会进行二次安装
 ###### paddlejs infer test
 1. Jest 执行 server command：`python3 -m http.server 9811` 开启本地服务
 2. 启动 Jest 测试服务，通过 jest-puppeteer 插件完成 chrome 操作，加载 @paddlejs-models/ocr 脚本完成推理流程
 3. 测试用例为原图识别后的文本结果与预期文本结果（expect.json）进行对比，测试通过有两个标准：
    * 原图识别结果逐字符与预期结果对比，误差不超过 **10个字符**；
    * 原图识别结果每个文本框字符内容与预期结果进行相似度对比，相似度不小于 0.9（全部一致则相似度为1）。 
    只有满足上述两个标准，视为测试通过。通过为如下显示：
 <img width="600" src="https://user-images.githubusercontent.com/43414102/146406599-80b30c66-f2f8-4f57-a68a-007c479ff0f7.png">
--- a/test_tipc/prepare_js.sh
+++ b/test_tipc/prepare_js.sh
@ -0,0 +1,92 @@
 #!/bin/bash
 set -o errexit
 set -o nounset
 shopt -s extglob
 # paddlejs prepare 主要流程
 # 1. 判断 node, npm 是否安装
 # 2. 下载测试模型，当前检测模型是 ch_PP-OCRv2_det_infer ，识别模型是 ch_PP-OCRv2_rec_infer [1, 3, 32, 320]。如果需要替换模型，可直接将模型文件放在test_tipc/web/models/目录下。
 #  - 文本检测模型：https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
 #  - 文本识别模型：https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
 #  - 文本识别模型[1, 3, 32, 320]：https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar
 #  - 保证较为准确的识别效果，需要将文本识别模型导出为输入shape[1, 3, 32, 320]的静态模型
 # 3. 转换模型， model.pdmodel model.pdiparams 转换为 model.json chunk.dat（检测模型保存地址：test_tipc/web/models/ch_PP-OCRv2/det，识别模型保存地址：test_tipc/web/models/ch_PP-OCRv2/rec）
 # 4. 安装最新版本 ocr sdk  @paddlejs-models/ocr@latest
 # 5. 安装测试环境依赖 puppeteer、jest、jest-puppeteer，如果检查到已经安装，则不会进行二次安装
 # 判断是否安装了node
 if ! type node >/dev/null 2>&1; then
    echo -e "\033[31m node 未安装 \033[0m"
    exit
 fi
 # 判断是否安装了npm
 if ! type npm >/dev/null 2>&1; then
    echo -e "\033[31m npm 未安装 \033[0m"
    exit
 fi
 # MODE be 'js_infer'
 MODE=$1
 # js_infer MODE , load model file and convert model to js_infer
 if [ ${MODE} != "js_infer" ];then
    echo "Please change mode to 'js_infer'"
    exit
 fi
 # saved_model_name
 det_saved_model_name=ch_PP-OCRv2_det_infer
 rec_saved_model_name=ch_PP-OCRv2_rec_infer
 # model_path
 model_path=test_tipc/web/models/
 rm -rf $model_path
 echo ${model_path}${det_saved_model_name}
 echo ${model_path}${rec_saved_model_name}
 # download ocr_det inference model
 wget -nc -P $model_path https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
 cd $model_path && tar xf ch_PP-OCRv2_det_infer.tar && cd ../../../
 # download ocr_rec inference model
 wget -nc -P $model_path https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar
 cd $model_path && tar xf ch_PP-OCRv2_rec_infer.tar && cd ../../../
 MYDIR=`pwd`
 echo $MYDIR
 pip3 install paddlejsconverter
 # convert inference model to web model: model.json、chunk.dat
 paddlejsconverter \
   --modelPath=$model_path$det_saved_model_name/inference.pdmodel \
   --paramPath=$model_path$det_saved_model_name/inference.pdiparams \
   --outputDir=$model_path$det_saved_model_name/ \
 paddlejsconverter \
   --modelPath=$model_path$rec_saved_model_name/inference.pdmodel \
   --paramPath=$model_path$rec_saved_model_name/inference.pdiparams \
   --outputDir=$model_path$rec_saved_model_name/ \
 # always install latest ocr sdk
 cd test_tipc/web
 echo -e "\033[33m Installing the latest ocr sdk... \033[0m"
 npm install @paddlejs-models/ocr@latest
 npm info @paddlejs-models/ocr
 echo -e "\033[32m The latest ocr sdk installed completely.!~ \033[0m"
 # install dependencies
 if [ `npm list --dept 0 | grep puppeteer | wc -l` -ne 0 ] && [ `npm list --dept 0 | grep jest | wc -l` -ne 0 ];then
   echo -e "\033[32m Dependencies have installed \033[0m"
 else
   echo -e "\033[33m Installing dependencies ... \033[0m"
   npm install jest jest-puppeteer puppeteer
   echo -e "\033[32m Dependencies installed completely.!~ \033[0m"
 fi
 # del package-lock.json
 rm package-lock.json
--- a/test_tipc/test_inference_js.sh
+++ b/test_tipc/test_inference_js.sh
@ -0,0 +1,8 @@
 #!/bin/bash
 set -o errexit
 set -o nounset
 cd test_tipc/web
 # run ocr test in chrome
 ./node_modules/.bin/jest --config ./jest.config.js
--- a/test_tipc/web/expect.json
+++ b/test_tipc/web/expect.json
@ -0,0 +1,20 @@
 {
    "text":  [
        "纯臻营养护发素",
        "产品信息/参数",
        "（45元/每公斤，100公斤起订）",
        "每瓶22元，1000瓶起订）",
        "【品牌】：代加工方式/OEMODM",
        "【品名】：纯臻营养护发素",
        "【产品编号】：YM-X-3011",
        "ODMOEM",
        "【净含量】：220ml",
        "【适用人群】：适合所有肤质",
        "【主要成分】：鲸蜡硬脂醇、燕麦β-葡聚",
        "糖、椰油酰胺丙基甜菜碱、泛醌",
        "（成品包材）",
        "【主要功能】：可紧致头发磷层，从而达到",
        "即时持久改善头发光泽的效果，给干燥的头",
        "发足够的滋养"
    ]
 }
--- a/test_tipc/web/index.html
+++ b/test_tipc/web/index.html
@ -0,0 +1,13 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>ocr test</title>
 </head>
 <body>
    <img id="ocr" src="./test.jpg" />
 </body>
 <script src="./node_modules/@paddlejs-models/ocr/lib/index.js"></script>
 </html>
--- a/test_tipc/web/index.test.js
+++ b/test_tipc/web/index.test.js
@ -0,0 +1,82 @@
 const expectData = require('./expect.json');
 describe('e2e test ocr model', () => {
    beforeAll(async () => {
        await page.goto(PATH);
    });
    it('ocr infer and diff test', async () => {
        page.on('console', msg => console.log('PAGE LOG:', msg.text()));
        const text = await page.evaluate(async () => {
            const $ocr = document.querySelector('#ocr');
            const ocr = paddlejs['ocr'];
            await ocr.init('./models/ch_PP-OCRv2_det_infer', './models/ch_PP-OCRv2_rec_infer');
            const res = await ocr.recognize($ocr);
            return res.text;
        });
        // 模型文字识别结果与预期结果diff的字符数
        let diffNum = 0;
        // 文本框字符串相似度
        let similarity = 0;
        // 预期字符diff数
        const expectedDiffNum = 10;
        // 预期文本框字符串相似度
        const expecteSimilarity = 0.9;
        // 预期文本内容
        const expectResult = expectData.text;
        expectResult && expectResult.forEach((item, index) => {
            const word = text[index];
            // 逐字符对比
            for(let i = 0; i < item.length; i++) {
                if (item[i] !== word[i]) {
                    console.log('expect: ', item[i], ' word: ', word[i]);
                    diffNum++;
                }
            }
            // 文本框字符串相似度对比
            const s = similar(item, word);
            similarity += s;
        });
        similarity = similarity / expectResult.length;
        expect(diffNum).toBeLessThanOrEqual(expectedDiffNum);
        expect(similarity).toBeGreaterThanOrEqual(expecteSimilarity);
        function similar(string, expect) {
            if (!string || !expect) {
                return 0;
            }
            const length = string.length > expect.length ? string.length : expect.length;
            const n = string.length;
            const m = expect.length;
            let data = [];
            const min = (a, b, c) => {
                return a < b ? (a < c ? a : c) : (b < c ? b : c);
            };
            let i, j, si, ej, cost;
            if (n === 0) return m;
            if (m === 0) return n;
            for (i = 0; i <= n; i++) {
                data[i] = [];
                [i][0] = i
            }
            for (j = 0; j <= m; j++) {
                data[0][j] = j;
            }
            for (i = 1; i <= n; i++) {
                si = string.charAt(i - 1);
                for (j = 1; j <= m; j++) {
                    ej = expect.charAt(j - 1);
                    cost = si === ej ? 0 : 1;
                    data[i][j] = min(data[i - 1][j] + 1, data[i][j - 1] + 1, data[i - 1][j - 1] + cost);
                }
            }
            return (1 - data[n][m] / length);
        }
    });
 });
--- a/test_tipc/web/jest-puppeteer.config.js
+++ b/test_tipc/web/jest-puppeteer.config.js
@ -0,0 +1,14 @@
 // jest-puppeteer.config.js
 module.exports = {
    launch: {
        headless: false,
        product: 'chrome'
    },
    browserContext: 'default',
    server: {
        command: 'python3 -m http.server 9811',
        port: 9811,
        launchTimeout: 10000,
        debug: true
    }
 };
--- a/test_tipc/web/jest.config.js
+++ b/test_tipc/web/jest.config.js
@ -0,0 +1,111 @@
 // For a detailed explanation regarding each configuration property and type check, visit:
 // https://jestjs.io/docs/en/configuration.html
 module.exports = {
    preset: 'jest-puppeteer',
    // All imported modules in your tests should be mocked automatically
    // automock: false,
    // Automatically clear mock calls and instances between every test
    clearMocks: true,
    // An object that configures minimum threshold enforcement for coverage results
    // coverageThreshold: undefined,
    // A set of global variables that need to be available in all test environments
    globals: {
        PATH: 'http://localhost:9811'
    },
    // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
    // maxWorkers: "50%",
    // An array of directory names to be searched recursively up from the requiring module's location
    // moduleDirectories: [
    //   "node_modules"
    // ],
    // An array of file extensions your modules use
    moduleFileExtensions: [
        'js',
        'json',
        'jsx',
        'ts',
        'tsx',
        'node'
    ],
    // The root directory that Jest should scan for tests and modules within
    // rootDir: undefined,
    // A list of paths to directories that Jest should use to search for files in
    roots: [
        '<rootDir>'
    ],
    // Allows you to use a custom runner instead of Jest's default test runner
    // runner: "jest-runner",
    // The paths to modules that run some code to configure or set up the testing environment before each test
    // setupFiles: [],
    // A list of paths to modules that run some code to configure or set up the testing framework before each test
    // setupFilesAfterEnv: [],
    // The number of seconds after which a test is considered as slow and reported as such in the results.
    // slowTestThreshold: 5,
    // A list of paths to snapshot serializer modules Jest should use for snapshot testing
    // snapshotSerializers: [],
    // The test environment that will be used for testing
    // testEnvironment: 'jsdom',
    // Options that will be passed to the testEnvironment
    // testEnvironmentOptions: {},
    // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
    testPathIgnorePatterns: [
        '/node_modules/'
    ],
    // The regexp pattern or array of patterns that Jest uses to detect test files
    testRegex: '.(.+)\\.test\\.(js|ts)$',
    // This option allows the use of a custom results processor
    // testResultsProcessor: undefined,
    // This option allows use of a custom test runner
    // testRunner: "jest-circus/runner",
    // This option sets the URL for the jsdom environment. It is reflected in properties such as location.href
    testURL: 'http://localhost:9898/',
    // Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout"
    // timers: "real",
    // A map from regular expressions to paths to transformers
    transform: {
        '^.+\\.js$': 'babel-jest'
    },
    // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
    transformIgnorePatterns: [
        '/node_modules/',
        '\\.pnp\\.[^\\/]+$'
    ],
    // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
    // unmockedModulePathPatterns: undefined,
    // Indicates whether each individual test should be reported during the run
    verbose: true,
    // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
    // watchPathIgnorePatterns: [],
    // Whether to use watchman for file crawling
    // watchman: true,
    testTimeout: 50000
 };
--- a/test_tipc/web/test.jpg
+++ b/test_tipc/web/test.jpg