feat(har): allow storing content as separate files (#14934)

This commit is contained in:
Pavel Feldman 2022-06-16 15:33:32 -08:00 committed by GitHub
parent 765ac5f0a7
commit 245c33a5d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 207 additions and 46 deletions

View File

@ -591,8 +591,9 @@ Logger sink for Playwright logging.
* langs: js
- `recordHar` <[Object]>
- `omitContent` ?<[boolean]> Optional setting to control whether to omit request content from the HAR. Defaults to
`false`.
- `path` <[path]> Path on the filesystem to write the HAR file to.
`false`. Deprecated, use `content` policy instead.
- `content` ?<[HarContentPolicy]<"omit"|"embed"|"attach">> Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. Defaults to `embed`, which stores content inline the HAR file as per HAR specification.
- `path` <[path]> Path on the filesystem to write the HAR file to. If the file name ends with `.zip`, the har file is archived. Content `attach` will also enforce `zip` compression.
- `urlFilter` ?<[string]|[RegExp]> A glob or regex pattern to filter requests that are stored in the HAR. When a [`option: baseURL`] via the context options was provided and the passed URL is a path, it gets merged via the [`new URL()`](https://developer.mozilla.org/en-US/docs/Web/API/URL/URL) constructor.
Enables [HAR](http://www.softwareishard.com/blog/har-12-spec) recording for all pages into `recordHar.path` file. If not

View File

@ -383,7 +383,7 @@ function prepareRecordHarOptions(options: BrowserContextOptions['recordHar']): c
return;
return {
path: options.path,
omitContent: options.omitContent,
content: options.content || (options.omitContent ? 'omit' : 'embed'),
urlGlob: isString(options.urlFilter) ? options.urlFilter : undefined,
urlRegexSource: isRegExp(options.urlFilter) ? options.urlFilter.source : undefined,
urlRegexFlags: isRegExp(options.urlFilter) ? options.urlFilter.flags : undefined,

View File

@ -20,8 +20,7 @@ import type * as structs from '../../types/structs';
import type * as api from '../../types/types';
import type * as channels from '../protocol/channels';
import { TimeoutSettings } from '../common/timeoutSettings';
import { headersObjectToArray } from '../utils';
import { BrowserContext } from './browserContext';
import { BrowserContext, prepareBrowserContextParams } from './browserContext';
import { ChannelOwner } from './channelOwner';
import { envObjectToArray } from './clientHelper';
import { Events } from './events';
@ -31,10 +30,11 @@ import type { Env, WaitForEventOptions, Headers, BrowserContextOptions } from '.
import { Waiter } from './waiter';
import { HarRouter } from './harRouter';
type ElectronOptions = Omit<channels.ElectronLaunchOptions, 'env'|'extraHTTPHeaders'> & {
type ElectronOptions = Omit<channels.ElectronLaunchOptions, 'env'|'extraHTTPHeaders'|'recordHar'> & {
env?: Env,
extraHTTPHeaders?: Headers,
har?: BrowserContextOptions['har']
har?: BrowserContextOptions['har'],
recordHar?: BrowserContextOptions['recordHar'],
};
type ElectronAppType = typeof import('electron');
@ -50,8 +50,7 @@ export class Electron extends ChannelOwner<channels.ElectronChannel> implements
async launch(options: ElectronOptions = {}): Promise<ElectronApplication> {
const params: channels.ElectronLaunchParams = {
...options,
extraHTTPHeaders: options.extraHTTPHeaders && headersObjectToArray(options.extraHTTPHeaders),
...await prepareBrowserContextParams(options),
env: envObjectToArray(options.env ? options.env : process.env),
};
const harRouter = options.har ? await HarRouter.create(options.har) : null;

View File

@ -62,6 +62,7 @@ export type BrowserContextOptions = Omit<channels.BrowserNewContextOptions, 'vie
recordHar?: {
path: string,
omitContent?: boolean,
content?: 'omit' | 'embed' | 'attach',
urlFilter?: string | RegExp,
},
};

View File

@ -264,8 +264,8 @@ export type SerializedError = {
};
export type RecordHarOptions = {
omitContent?: boolean,
path: string,
content: 'embed' | 'attach' | 'omit',
urlGlob?: string,
urlRegexSource?: string,
urlRegexFlags?: string,

View File

@ -224,8 +224,13 @@ SerializedError:
RecordHarOptions:
type: object
properties:
omitContent: boolean?
path: string
content:
type: enum
literals:
- embed
- attach
- omit
urlGlob: string?
urlRegexSource: string?
urlRegexFlags: string?

View File

@ -154,8 +154,8 @@ export function createScheme(tChannel: (name: string) => Validator): Scheme {
value: tOptional(tType('SerializedValue')),
});
scheme.RecordHarOptions = tObject({
omitContent: tOptional(tBoolean),
path: tString,
content: tEnum(['embed', 'attach', 'omit']),
urlGlob: tOptional(tString),
urlRegexSource: tOptional(tString),
urlRegexFlags: tOptional(tString),

View File

@ -17,7 +17,7 @@
import * as os from 'os';
import { TimeoutSettings } from '../common/timeoutSettings';
import { debugMode, createGuid } from '../utils';
import { debugMode } from '../utils';
import { mkdirIfNeeded } from '../utils/fileUtils';
import type { Browser, BrowserOptions } from './browser';
import type { Download } from './download';
@ -87,7 +87,7 @@ export abstract class BrowserContext extends SdkObject {
this.fetchRequest = new BrowserContextAPIRequestContext(this);
if (this._options.recordHar)
this._harRecorder = new HarRecorder(this, { ...this._options.recordHar, path: path.join(this._browser.options.artifactsDir, `${createGuid()}.har`) });
this._harRecorder = new HarRecorder(this, this._options.recordHar);
this.tracing = new Tracing(this, browser.options.tracesDir);
}

View File

@ -15,30 +15,35 @@
*/
import fs from 'fs';
import type { APIRequestContext } from '../fetch';
import path from 'path';
import { Artifact } from '../artifact';
import type { BrowserContext } from '../browserContext';
import type * as har from './har';
import { HarTracer } from './harTracer';
import type * as channels from '../../protocol/channels';
import { yazl } from '../../zipBundle';
import type { ZipFile } from '../../zipBundle';
import { ManualPromise } from '../../utils/manualPromise';
import type EventEmitter from 'events';
import { createGuid } from '../../utils';
export class HarRecorder {
private _artifact: Artifact;
private _isFlushed: boolean = false;
private _options: channels.RecordHarOptions;
private _tracer: HarTracer;
private _entries: har.Entry[] = [];
private _zipFile: ZipFile | null = null;
constructor(context: BrowserContext | APIRequestContext, options: channels.RecordHarOptions) {
this._artifact = new Artifact(context, options.path);
this._options = options;
constructor(context: BrowserContext, options: channels.RecordHarOptions) {
this._artifact = new Artifact(context, path.join(context._browser.options.artifactsDir, `${createGuid()}.har`));
const urlFilterRe = options.urlRegexSource !== undefined && options.urlRegexFlags !== undefined ? new RegExp(options.urlRegexSource, options.urlRegexFlags) : undefined;
this._tracer = new HarTracer(context, this, {
content: options.omitContent ? 'omit' : 'embedded',
content: options.content || 'embed',
waitForContentOnStop: true,
skipScripts: false,
urlFilter: urlFilterRe ?? options.urlGlob,
});
this._zipFile = options.content === 'attach' || options.path.endsWith('.zip') ? new yazl.ZipFile() : null;
this._tracer.start();
}
@ -50,6 +55,8 @@ export class HarRecorder {
}
onContentBlob(sha1: string, buffer: Buffer) {
if (this._zipFile)
this._zipFile!.addBuffer(buffer, sha1);
}
async flush() {
@ -57,9 +64,24 @@ export class HarRecorder {
return;
this._isFlushed = true;
await this._tracer.flush();
const log = this._tracer.stop();
log.entries = this._entries;
await fs.promises.writeFile(this._options.path, JSON.stringify({ log }, undefined, 2));
const harFileContent = JSON.stringify({ log }, undefined, 2);
if (this._zipFile) {
const result = new ManualPromise<void>();
(this._zipFile as unknown as EventEmitter).on('error', error => result.reject(error));
this._zipFile.addBuffer(Buffer.from(harFileContent, 'utf-8'), 'har.har');
this._zipFile.end();
this._zipFile.outputStream.pipe(fs.createWriteStream(this._artifact.localPath())).on('close', () => {
result.resolve();
});
await result;
} else {
await fs.promises.writeFile(this._artifact.localPath(), harFileContent);
}
}
async export(): Promise<Artifact> {

View File

@ -40,7 +40,7 @@ export interface HarTracerDelegate {
}
type HarTracerOptions = {
content: 'omit' | 'sha1' | 'embedded';
content: 'omit' | 'attach' | 'embed';
skipScripts: boolean;
waitForContentOnStop: boolean;
urlFilter?: string | RegExp;
@ -272,7 +272,7 @@ export class HarTracer {
compressionCalculationBarrier.setDecodedBodySize(0);
}).then(() => {
const postData = response.request().postDataBuffer();
if (postData && harEntry.request.postData && this._options.content === 'sha1') {
if (postData && harEntry.request.postData && this._options.content === 'attach') {
harEntry.request.postData._sha1 = calculateSha1(postData) + '.' + (mime.getExtension(harEntry.request.postData.mimeType) || 'dat');
if (this._started)
this._delegate.onContentBlob(harEntry.request.postData._sha1, postData);
@ -308,7 +308,7 @@ export class HarTracer {
return;
}
content.size = buffer.length;
if (this._options.content === 'embedded') {
if (this._options.content === 'embed') {
// Sometimes, we can receive a font/media file with textual mime type. Browser
// still interprets them correctly, but the 'content-type' header is obviously wrong.
if (isTextualMimeType(content.mimeType) && resourceType !== 'font') {
@ -317,7 +317,7 @@ export class HarTracer {
content.text = buffer.toString('base64');
content.encoding = 'base64';
}
} else if (this._options.content === 'sha1') {
} else if (this._options.content === 'attach') {
content._sha1 = calculateSha1(buffer) + '.' + (mime.getExtension(content.mimeType) || 'dat');
if (this._started)
this._delegate.onContentBlob(content._sha1, buffer);
@ -475,7 +475,7 @@ function createHarEntry(method: string, url: URL, requestref: string, frameref:
return harEntry;
}
function postDataForRequest(request: network.Request, content: 'omit' | 'sha1' | 'embedded'): har.PostData | undefined {
function postDataForRequest(request: network.Request, content: 'omit' | 'attach' | 'embed'): har.PostData | undefined {
const postData = request.postDataBuffer();
if (!postData)
return;
@ -484,7 +484,7 @@ function postDataForRequest(request: network.Request, content: 'omit' | 'sha1' |
return postDataForBuffer(postData, contentType, content);
}
function postDataForBuffer(postData: Buffer | null, contentType: string | undefined, content: 'omit' | 'sha1' | 'embedded'): har.PostData | undefined {
function postDataForBuffer(postData: Buffer | null, contentType: string | undefined, content: 'omit' | 'attach' | 'embed'): har.PostData | undefined {
if (!postData)
return;
@ -496,7 +496,7 @@ function postDataForBuffer(postData: Buffer | null, contentType: string | undefi
params: []
};
if (content === 'embedded' && contentType !== 'application/octet-stream')
if (content === 'embed' && contentType !== 'application/octet-stream')
result.text = postData.toString();
if (contentType === 'application/x-www-form-urlencoded') {

View File

@ -89,7 +89,7 @@ export class Tracing extends SdkObject implements InstrumentationListener, Snaps
this._context = context;
this._precreatedTracesDir = tracesDir;
this._harTracer = new HarTracer(context, this, {
content: 'sha1',
content: 'attach',
waitForContentOnStop: false,
skipScripts: true,
});

View File

@ -34,7 +34,7 @@ export class InMemorySnapshotter extends BaseSnapshotStorage implements Snapshot
constructor(context: BrowserContext) {
super();
this._snapshotter = new Snapshotter(context, this);
this._harTracer = new HarTracer(context, this, { content: 'sha1', waitForContentOnStop: false, skipScripts: true });
this._harTracer = new HarTracer(context, this, { content: 'attach', waitForContentOnStop: false, skipScripts: true });
}
async initialize(): Promise<void> {

View File

@ -10638,12 +10638,21 @@ export interface BrowserType<Unused = {}> {
*/
recordHar?: {
/**
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`.
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`. Deprecated, use `content`
* policy instead.
*/
omitContent?: boolean;
/**
* Path on the filesystem to write the HAR file to.
* Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach`
* is specified, resources are persistet as separate files and all of these files are archived along with the HAR file.
* Defaults to `embed`, which stores content inline the HAR file as per HAR specification.
*/
content?: "omit"|"embed"|"attach";
/**
* Path on the filesystem to write the HAR file to. If the file name ends with `.zip`, the har file is archived. Content
* `attach` will also enforce `zip` compression.
*/
path: string;
@ -11846,12 +11855,21 @@ export interface AndroidDevice {
*/
recordHar?: {
/**
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`.
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`. Deprecated, use `content`
* policy instead.
*/
omitContent?: boolean;
/**
* Path on the filesystem to write the HAR file to.
* Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach`
* is specified, resources are persistet as separate files and all of these files are archived along with the HAR file.
* Defaults to `embed`, which stores content inline the HAR file as per HAR specification.
*/
content?: "omit"|"embed"|"attach";
/**
* Path on the filesystem to write the HAR file to. If the file name ends with `.zip`, the har file is archived. Content
* `attach` will also enforce `zip` compression.
*/
path: string;
@ -13437,12 +13455,21 @@ export interface Browser extends EventEmitter {
*/
recordHar?: {
/**
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`.
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`. Deprecated, use `content`
* policy instead.
*/
omitContent?: boolean;
/**
* Path on the filesystem to write the HAR file to.
* Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach`
* is specified, resources are persistet as separate files and all of these files are archived along with the HAR file.
* Defaults to `embed`, which stores content inline the HAR file as per HAR specification.
*/
content?: "omit"|"embed"|"attach";
/**
* Path on the filesystem to write the HAR file to. If the file name ends with `.zip`, the har file is archived. Content
* `attach` will also enforce `zip` compression.
*/
path: string;
@ -14244,12 +14271,21 @@ export interface Electron {
*/
recordHar?: {
/**
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`.
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`. Deprecated, use `content`
* policy instead.
*/
omitContent?: boolean;
/**
* Path on the filesystem to write the HAR file to.
* Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach`
* is specified, resources are persistet as separate files and all of these files are archived along with the HAR file.
* Defaults to `embed`, which stores content inline the HAR file as per HAR specification.
*/
content?: "omit"|"embed"|"attach";
/**
* Path on the filesystem to write the HAR file to. If the file name ends with `.zip`, the har file is archived. Content
* `attach` will also enforce `zip` compression.
*/
path: string;
@ -16084,12 +16120,21 @@ export interface BrowserContextOptions {
*/
recordHar?: {
/**
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`.
* Optional setting to control whether to omit request content from the HAR. Defaults to `false`. Deprecated, use `content`
* policy instead.
*/
omitContent?: boolean;
/**
* Path on the filesystem to write the HAR file to.
* Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach`
* is specified, resources are persistet as separate files and all of these files are archived along with the HAR file.
* Defaults to `embed`, which stores content inline the HAR file as per HAR specification.
*/
content?: "omit"|"embed"|"attach";
/**
* Path on the filesystem to write the HAR file to. If the file name ends with `.zip`, the har file is archived. Content
* `attach` will also enforce `zip` compression.
*/
path: string;

View File

@ -111,3 +111,12 @@ export async function parseTrace(file: string): Promise<{ events: any[], resourc
resources,
};
}
export async function parseHar(file: string): Promise<Map<string, Buffer>> {
const zipFS = new ZipFileSystem(file);
const resources = new Map<string, Buffer>();
for (const entry of await zipFS.entries())
resources.set(entry, await zipFS.read(entry));
zipFS.close();
return resources;
}

View File

@ -22,10 +22,11 @@ import http2 from 'http2';
import type { BrowserContext, BrowserContextOptions } from 'playwright-core';
import type { AddressInfo } from 'net';
import type { Log } from '../../packages/playwright-core/src/server/har/har';
import { parseHar } from '../config/utils';
async function pageWithHar(contextFactory: (options?: BrowserContextOptions) => Promise<BrowserContext>, testInfo: any, outputPath: string = 'test.har') {
const harPath = testInfo.outputPath(outputPath);
const context = await contextFactory({ recordHar: { path: harPath }, ignoreHTTPSErrors: true });
async function pageWithHar(contextFactory: (options?: BrowserContextOptions) => Promise<BrowserContext>, testInfo: any, options: { outputPath?: string, content?: 'embed' | 'attach' | 'omit', omitContent?: boolean } = {}) {
const harPath = testInfo.outputPath(options.outputPath || 'test.har');
const context = await contextFactory({ recordHar: { path: harPath, content: options.content, omitContent: options.omitContent }, ignoreHTTPSErrors: true });
const page = await context.newPage();
return {
page,
@ -33,7 +34,11 @@ async function pageWithHar(contextFactory: (options?: BrowserContextOptions) =>
getLog: async () => {
await context.close();
return JSON.parse(fs.readFileSync(harPath).toString())['log'] as Log;
}
},
getZip: async () => {
await context.close();
return parseHar(harPath);
},
};
}
@ -270,6 +275,80 @@ it('should include content @smoke', async ({ contextFactory, server }, testInfo)
expect(log.entries[2].response.content.compression).toBe(0);
});
it('should include content in zip', async ({ contextFactory, server }, testInfo) => {
const { page, getZip } = await pageWithHar(contextFactory, testInfo, { outputPath: 'test.har.zip' });
await page.goto(server.PREFIX + '/har.html');
await page.evaluate(() => fetch('/pptr.png').then(r => r.arrayBuffer()));
const zip = await getZip();
const log = JSON.parse(zip.get('har.har').toString())['log'] as Log;
expect(log.entries[0].response.content.encoding).toBe(undefined);
expect(log.entries[0].response.content.mimeType).toBe('text/html; charset=utf-8');
expect(log.entries[0].response.content.text).toContain('HAR Page');
expect(log.entries[0].response.content.size).toBeGreaterThanOrEqual(96);
expect(log.entries[0].response.content.compression).toBe(0);
expect(log.entries[1].response.content.encoding).toBe(undefined);
expect(log.entries[1].response.content.mimeType).toBe('text/css; charset=utf-8');
expect(log.entries[1].response.content.text).toContain('pink');
expect(log.entries[1].response.content.size).toBeGreaterThanOrEqual(37);
expect(log.entries[1].response.content.compression).toBe(0);
expect(log.entries[2].response.content.encoding).toBe('base64');
expect(log.entries[2].response.content.mimeType).toBe('image/png');
expect(Buffer.from(log.entries[2].response.content.text, 'base64').byteLength).toBeGreaterThan(0);
expect(log.entries[2].response.content.size).toBeGreaterThanOrEqual(6000);
expect(log.entries[2].response.content.compression).toBe(0);
});
it('should omit content', async ({ contextFactory, server }, testInfo) => {
const { page, getLog } = await pageWithHar(contextFactory, testInfo, { content: 'omit', outputPath: 'test.har' });
await page.goto(server.PREFIX + '/har.html');
await page.evaluate(() => fetch('/pptr.png').then(r => r.arrayBuffer()));
const log = await getLog();
expect(log.entries[0].response.content.text).toBe(undefined);
expect(log.entries[0].response.content._sha1).toBe(undefined);
});
it('should omit content legacy', async ({ contextFactory, server }, testInfo) => {
const { page, getLog } = await pageWithHar(contextFactory, testInfo, { omitContent: true, outputPath: 'test.har' });
await page.goto(server.PREFIX + '/har.html');
await page.evaluate(() => fetch('/pptr.png').then(r => r.arrayBuffer()));
const log = await getLog();
expect(log.entries[0].response.content.text).toBe(undefined);
expect(log.entries[0].response.content._sha1).toBe(undefined);
});
it('should attach content', async ({ contextFactory, server }, testInfo) => {
const { page, getZip } = await pageWithHar(contextFactory, testInfo, { content: 'attach', outputPath: 'test.har.zip' });
await page.goto(server.PREFIX + '/har.html');
await page.evaluate(() => fetch('/pptr.png').then(r => r.arrayBuffer()));
const zip = await getZip();
const log = JSON.parse(zip.get('har.har').toString())['log'] as Log;
expect(log.entries[0].response.content.encoding).toBe(undefined);
expect(log.entries[0].response.content.mimeType).toBe('text/html; charset=utf-8');
expect(log.entries[0].response.content._sha1).toContain('75841480e2606c03389077304342fac2c58ccb1b');
expect(log.entries[0].response.content.size).toBeGreaterThanOrEqual(96);
expect(log.entries[0].response.content.compression).toBe(0);
expect(log.entries[1].response.content.encoding).toBe(undefined);
expect(log.entries[1].response.content.mimeType).toBe('text/css; charset=utf-8');
expect(log.entries[1].response.content._sha1).toContain('79f739d7bc88e80f55b9891a22bf13a2b4e18adb');
expect(log.entries[1].response.content.size).toBeGreaterThanOrEqual(37);
expect(log.entries[1].response.content.compression).toBe(0);
expect(log.entries[2].response.content.encoding).toBe(undefined);
expect(log.entries[2].response.content.mimeType).toBe('image/png');
expect(log.entries[2].response.content._sha1).toContain('a4c3a18f0bb83f5d9fe7ce561e065c36205762fa');
expect(log.entries[2].response.content.size).toBeGreaterThanOrEqual(6000);
expect(log.entries[2].response.content.compression).toBe(0);
expect(zip.get('75841480e2606c03389077304342fac2c58ccb1b.html').toString()).toContain('HAR Page');
expect(zip.get('79f739d7bc88e80f55b9891a22bf13a2b4e18adb.css').toString()).toContain('pink');
expect(zip.get('a4c3a18f0bb83f5d9fe7ce561e065c36205762fa.png').byteLength).toBe(log.entries[2].response.content.size);
});
it('should filter by glob', async ({ contextFactory, server }, testInfo) => {
const harPath = testInfo.outputPath('test.har');
const context = await contextFactory({ baseURL: server.PREFIX, recordHar: { path: harPath, urlFilter: '/*.css' }, ignoreHTTPSErrors: true });
@ -597,11 +676,11 @@ it('should filter favicon and favicon redirects', async ({ server, browserName,
});
it('should have different hars for concurrent contexts', async ({ contextFactory }, testInfo) => {
const session0 = await pageWithHar(contextFactory, testInfo, 'test-0.har');
const session0 = await pageWithHar(contextFactory, testInfo, { outputPath: 'test-0.har' });
await session0.page.goto('data:text/html,<title>Zero</title>');
await session0.page.waitForLoadState('domcontentloaded');
const session1 = await pageWithHar(contextFactory, testInfo, 'test-1.har');
const session1 = await pageWithHar(contextFactory, testInfo, { outputPath: 'test-1.har' });
await session1.page.goto('data:text/html,<title>One</title>');
await session1.page.waitForLoadState('domcontentloaded');