Commit 7bc90b2b authored by nanahira's avatar nanahira

first

parent a9eefc11
# compiled output
/dist
/node_modules
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# OS
.DS_Store
# Tests
/coverage
/.nyc_output
# IDEs and editors
/.idea
.project
.classpath
.c9/
*.launch
.settings/
*.sublime-workspace
# IDE - VSCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
/data
/output
/config.yaml
.git*
Dockerfile
.dockerignore
/tests
webpack.config.js
dist/*
build/*
*.js
module.exports = {
parser: '@typescript-eslint/parser',
parserOptions: {
project: 'tsconfig.json',
tsconfigRootDir : __dirname,
sourceType: 'module',
},
plugins: ['@typescript-eslint/eslint-plugin'],
extends: [
'plugin:@typescript-eslint/recommended',
'plugin:prettier/recommended',
],
root: true,
env: {
node: true,
jest: true,
},
ignorePatterns: ['.eslintrc.js'],
rules: {
'@typescript-eslint/interface-name-prefix': 'off',
'@typescript-eslint/explicit-function-return-type': 'off',
'@typescript-eslint/explicit-module-boundary-types': 'off',
'@typescript-eslint/no-explicit-any': 'off',
},
};
# compiled output
/dist
/node_modules
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# OS
.DS_Store
# Tests
/coverage
/.nyc_output
# IDEs and editors
/.idea
.project
.classpath
.c9/
*.launch
.settings/
*.sublime-workspace
# IDE - VSCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
/data
/output
/config.yaml
stages:
- build
- deploy
variables:
GIT_DEPTH: "1"
before_script:
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
.build-image:
stage: build
script:
- docker build --pull -t $TARGET_IMAGE .
- docker push $TARGET_IMAGE
build-x86:
extends: .build-image
tags:
- docker
variables:
TARGET_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-x86
build-arm:
extends: .build-image
tags:
- docker-arm
variables:
TARGET_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-arm
.deploy:
stage: deploy
tags:
- docker
script:
- docker pull $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-x86
- docker pull $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-arm
- docker manifest create $TARGET_IMAGE --amend $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-x86 --amend
$CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG-arm
- docker manifest push $TARGET_IMAGE
deploy_latest:
extends: .deploy
variables:
TARGET_IMAGE: $CI_REGISTRY_IMAGE:latest
only:
- master
deploy_branch:
extends: .deploy
variables:
TARGET_IMAGE: $CI_REGISTRY_IMAGE:$CI_COMMIT_REF_SLUG
stages:
- install
- build
- deploy
variables:
GIT_DEPTH: "1"
npm_ci:
stage: install
tags:
- linux
script:
- npm ci
artifacts:
paths:
- node_modules
.build_base:
stage: build
tags:
- linux
dependencies:
- npm_ci
build:
extends: .build_base
script: npm run build
artifacts:
paths:
- dist/
upload_to_minio:
stage: deploy
dependencies:
- build
tags:
- linux
script:
- aws s3 --endpoint=https://minio.momobako.com:9000 sync --delete dist/ s3://nanahira/path
only:
- master
stages:
- install
- build
- deploy
variables:
GIT_DEPTH: "1"
npm_ci:
stage: install
tags:
- linux
script:
- npm ci
artifacts:
paths:
- node_modules
.build_base:
stage: build
tags:
- linux
dependencies:
- npm_ci
build:
extends:
- .build_base
script:
- npm run build
artifacts:
paths:
- dist/
unit-test:
extends:
- .build_base
script:
- npm run test
deploy_npm:
stage: deploy
dependencies:
- build
tags:
- linux
script:
- apt update;apt -y install coreutils
- echo $NPMRC | base64 --decode > ~/.npmrc
- npm publish . --access public && curl -X PUT "https://registry-direct.npmmirror.com/$(cat package.json | jq '.name' | sed 's/\"//g')/sync?sync_upstream=true" || true
only:
- master
/install-npm.sh
.git*
/data
/output
/config.yaml
.idea
.dockerignore
Dockerfile
/src
/coverage
/tests
/dist/tests
/build.js
\ No newline at end of file
{
"singleQuote": true,
"trailingComma": "all"
}
\ No newline at end of file
FROM node:lts-trixie-slim as base
LABEL Author="Nanahira <nanahira@momobako.com>"
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
RUN apt update && apt -y install curl ca-certificates gnupg2 && \
curl -sSL https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \
echo 'deb https://dl.google.com/linux/chrome/deb/ stable main' > /etc/apt/sources.list.d/google-chrome.list && \
apt update && \
apt -y install python3 build-essential git google-chrome-stable libnss3 libfreetype6-dev libharfbuzz-bin libharfbuzz-dev ca-certificates fonts-freefont-otf fonts-freefont-ttf fonts-noto-cjk fonts-noto-cjk-extra fonts-wqy-microhei fonts-wqy-zenhei xvfb && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* /var/log/*
WORKDIR /usr/src/app
COPY ./package*.json ./
FROM base as builder
RUN npm ci && npm cache clean --force
COPY . ./
RUN npm run build
FROM base
ENV NODE_ENV production
RUN npm ci && npm cache clean --force
COPY --from=builder /usr/src/app/dist ./dist
CMD [ "npm", "start" ]
The MIT License (MIT)
Copyright (c) 2021 Nanahira
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
#!/usr/bin/env node
/* build.js - node build.js [all|cjs|esm|types|clean] */
const fs = require('fs');
const path = require('path');
const { builtinModules } = require('module');
const DIST_DIR = 'dist';
/* ------------------------- utils ------------------------- */
function readJSONSafe(file, fallback = {}) {
try {
const p = path.resolve(process.cwd(), file);
const txt = fs.readFileSync(p, 'utf8');
return JSON.parse(txt);
} catch {
return fallback;
}
}
function uniq(arr) { return Array.from(new Set(arr)); }
function ensureDir(p) { fs.mkdirSync(p, { recursive: true }); }
function rimraf(p) { if (fs.existsSync(p)) fs.rmSync(p, { recursive: true, force: true }); }
function depsAsExternal(pkg) {
const dep = Object.keys(pkg.dependencies || {});
const peer = Object.keys(pkg.peerDependencies || {});
const names = uniq([...dep, ...peer]);
// 覆盖子路径导入(lodash/fp、react/jsx-runtime)
return uniq(names.flatMap((n) => [n, `${n}/*`]));
}
function nodeBuiltinsExternal() {
// 既包含 'fs' 也包含 'node:fs' 形式
return uniq([...builtinModules, ...builtinModules.map((m) => `node:${m}`)]);
}
async function loadEsbuild() {
try { return require('esbuild'); }
catch { const mod = await import('esbuild'); return mod.build ? mod : mod.default; }
}
function tsconfigPath() { return fs.existsSync('tsconfig.json') ? 'tsconfig.json' : undefined; }
function entryPointsFromPkg(/*pkg*/) { return ['index.ts']; }
/* ------------------------- esbuild builds ------------------------- */
async function buildOne(format, options) {
const esbuild = await loadEsbuild();
const { external, tsconfig, entryPoints } = options;
const isCjs = format === 'cjs';
const outfile = path.join(DIST_DIR, isCjs ? 'index.cjs' : 'index.mjs');
ensureDir(path.dirname(outfile));
console.log(`[build] ${format} -> ${outfile}`);
await esbuild.build({
entryPoints,
outfile,
bundle: true,
sourcemap: true,
format, // 'cjs' | 'esm'
platform: isCjs ? 'node' : 'neutral',
target: isCjs ? 'es2021' : 'esnext',
external, // deps + peerDeps + node builtins (含 node:*)
logLevel: 'info',
...(tsconfig ? { tsconfig } : {}),
});
}
/* ------------------------- types via TypeScript API ------------------------- */
function buildTypesAPI(outDir = DIST_DIR) {
let ts;
try { ts = require('typescript'); }
catch {
console.error('[types] Missing dependency: typescript');
process.exit(1);
}
const cfgPath = ts.findConfigFile('./', ts.sys.fileExists, 'tsconfig.json');
let fileNames, options;
if (cfgPath) {
// 读取 tsconfig.json
const { config } = ts.readConfigFile(cfgPath, ts.sys.readFile);
const parsed = ts.parseJsonConfigFileContent(config, ts.sys, path.dirname(cfgPath));
fileNames = parsed.fileNames;
options = parsed.options;
} else {
// 没有 tsconfig 的降级:仅用 index.ts
console.warn('[types] tsconfig.json not found; fallback to index.ts with basic options.');
fileNames = ['index.ts'].filter((f) => fs.existsSync(f));
options = {
moduleResolution: 99, // NodeNext(避免引入 enum 名字,用常量值)
target: 99, // ESNext
skipLibCheck: true,
strict: true,
};
}
// 强制仅输出声明
options.declaration = true;
options.emitDeclarationOnly = true;
options.outDir = outDir;
// 为了不受 sourceMap/emit 等其它设置影响
options.noEmitOnError = false;
console.log('[types] Generating .d.ts ...');
const program = ts.createProgram(fileNames, options);
const pre = ts.getPreEmitDiagnostics(program);
const emitResult = program.emit();
const diagnostics = pre.concat(emitResult.diagnostics);
if (diagnostics.length) {
const formatHost = {
getCanonicalFileName: (p) => p,
getCurrentDirectory: ts.sys.getCurrentDirectory,
getNewLine: () => ts.sys.newLine,
};
const message = ts.formatDiagnosticsWithColorAndContext(diagnostics, formatHost);
console.error(message);
if (emitResult.emitSkipped) {
throw new Error('[types] Type generation failed.');
}
}
console.log('[types] Declarations generated.');
}
/* ------------------------- main dispatcher ------------------------- */
(async function main() {
const sub = (process.argv[2] || 'all').toLowerCase(); // all | cjs | esm | types | clean
const pkg = readJSONSafe('package.json');
const externalFromPkg = depsAsExternal(pkg);
// 统一 external:依赖 + peer + Node 内置(含 node:*)
const external = uniq([...externalFromPkg, ...nodeBuiltinsExternal()]);
const tscPath = tsconfigPath();
const entryPoints = entryPointsFromPkg(pkg);
switch (sub) {
case 'clean': {
console.log('[clean] remove dist/');
rimraf(DIST_DIR);
break;
}
case 'cjs': {
await buildOne('cjs', { external, tsconfig: tscPath, entryPoints });
break;
}
case 'esm': {
await buildOne('esm', { external, tsconfig: tscPath, entryPoints });
break;
}
case 'types': {
ensureDir(DIST_DIR);
buildTypesAPI(DIST_DIR);
break;
}
case 'all':
default: {
console.log('[clean] remove dist/');
rimraf(DIST_DIR);
await buildOne('cjs', { external, tsconfig: tscPath, entryPoints });
await buildOne('esm', { external, tsconfig: tscPath, entryPoints });
buildTypesAPI(DIST_DIR);
console.log('[build] Done.');
break;
}
}
})().catch((err) => {
console.error('[build] Failed:', err);
process.exit(1);
});
export * from './src/puppeteer-worker';
export * from './src/create-puppeteer-worker';
export * from './src/page-request-waiter';
#!/bin/bash
npm i --save-exact --save-dev eslint@8.22.0
npm install --save-dev \
@types/node \
typescript \
'@typescript-eslint/eslint-plugin@^6.0.0' \
'@typescript-eslint/parser@^6.0.0 '\
'eslint-config-prettier@^9.0.0' \
'eslint-plugin-prettier@^5.0.0' \
prettier \
jest \
@types/jest \
ts-jest \
rimraf \
esbuild \
esbuild-register
This diff is collapsed.
{
"name": "puppeteer-worker",
"description": "Puppeteer worker wrapped",
"version": "1.0.0",
"main": "dist/index.cjs",
"module": "dist/index.mjs",
"types": "dist/index.d.ts",
"exports": {
".": {
"import": "./dist/index.mjs",
"require": "./dist/index.cjs",
"types": "./dist/index.d.ts"
}
},
"scripts": {
"lint": "eslint --fix .",
"build": "node build.js",
"build:cjs": "node build.js cjs",
"build:esm": "node build.js esm",
"build:types": "node build.js types",
"clean": "node build.js clean",
"test": "jest --passWithNoTests --runInBand",
"start": "node dist/index.js"
},
"repository": {
"type": "git",
"url": "https://code.moenext.com/nanahira/puppeteer-worker.git"
},
"author": "Nanahira <nanahira@momobako.com>",
"license": "MIT",
"keywords": [],
"bugs": {
"url": "https://code.moenext.com/nanahira/puppeteer-worker/issues"
},
"homepage": "https://code.moenext.com/nanahira/puppeteer-worker",
"jest": {
"moduleFileExtensions": [
"js",
"json",
"ts"
],
"rootDir": "tests",
"testRegex": ".*\\.spec\\.ts$",
"transform": {
"^.+\\.(t|j)s$": "ts-jest"
},
"collectCoverageFrom": [
"**/*.(t|j)s"
],
"coverageDirectory": "../coverage",
"testEnvironment": "node"
},
"devDependencies": {
"@types/cookie": "0.6.0",
"@types/jest": "^30.0.0",
"@types/node": "^24.9.2",
"@typescript-eslint/eslint-plugin": "^6.21.0",
"@typescript-eslint/parser": "^6.21.0",
"esbuild": "^0.25.11",
"esbuild-register": "^3.6.0",
"eslint": "8.22.0",
"eslint-config-prettier": "^9.1.2",
"eslint-plugin-prettier": "^5.5.4",
"jest": "^30.2.0",
"prettier": "^3.6.2",
"rimraf": "^6.1.0",
"ts-jest": "^29.4.5",
"typescript": "^5.9.3"
},
"dependencies": {
"better-lock": "^3.2.0",
"cookie": "0.6.0",
"puppeteer": "^24.27.0",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"puppeteer-finder": "^1.1.1"
}
}
import { PuppeteerWorker, PuppeteerWorkerOptions } from './puppeteer-worker';
export const createPuppeteerWorker = (options: PuppeteerWorkerOptions = {}) =>
new PuppeteerWorker(options).init();
import puppeteer from 'puppeteer-extra';
import StealthPlugin from 'puppeteer-extra-plugin-stealth';
import find from 'puppeteer-finder';
import * as process from 'node:process';
import * as fs from 'node:fs';
puppeteer.use(StealthPlugin());
const chromePath =
process.platform === 'linux' && fs.existsSync('/usr/bin/google-chrome-stable')
? '/usr/bin/google-chrome-stable'
: find();
export async function createPuppeteer(extraArgs: string[] = []) {
const puppeteerArgs = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-infobars',
'--disable-dev-shm-usage',
'--disable-blink-features=AutomationControlled',
'--ignore-certificate-errors',
'--no-first-run',
'--no-service-autorun',
'--password-store=basic',
'--system-developer-mode',
// the following flags all try to reduce memory
// '--single-process',
'--mute-audio',
'--disable-default-apps',
'--no-zygote',
'--disable-accelerated-2d-canvas',
'--disable-web-security',
// '--disable-gpu'
// '--js-flags="--max-old-space-size=1024"'
...extraArgs,
];
return puppeteer.launch({
headless: false,
args: puppeteerArgs,
ignoreDefaultArgs: [
'--disable-extensions',
'--enable-automation',
'--disable-component-extensions-with-background-pages',
],
executablePath: chromePath,
});
}
import {
AwaitablePredicate,
HTTPResponse,
Page,
WaitTimeoutOptions,
} from 'puppeteer';
export class PageRequestWaiter {
private responseCallbacks: Map<number, (response: HTTPResponse) => void> =
new Map();
private listenerIdCounter = 0;
private pageHandler = async (response: HTTPResponse) => {
for (const [id, callback] of this.responseCallbacks.entries()) {
try {
callback(response);
} catch (e) {
console.error(`Error in response callback for listener ${id}:`, e);
}
}
};
constructor(private readonly page: Page) {
this.register();
}
private register() {
this.page.on('response', this.pageHandler);
}
private unregister() {
this.page.off('response', this.pageHandler);
}
dispose() {
this.unregister();
this.responseCallbacks.clear();
}
/**
* Waits for a specific response and executes a callback during the waiting period.
* @param reqMatch A string (URL suffix) or a predicate function to match the response.
* @param cb A callback function to execute during the waiting period.
* @param options Optional timeout settings.
* @returns An object containing the callback result and the matched HTTP response.
*/
async waitForRequestAfter<T>(
reqMatch: string | AwaitablePredicate<HTTPResponse>,
cb: () => Promise<T>,
options?: WaitTimeoutOptions & { noThrow?: boolean },
): Promise<{ result: T; response: HTTPResponse | null }> {
if (typeof reqMatch === 'string') {
const urlSuffix = reqMatch as string;
reqMatch = (response) => response.url().endsWith(urlSuffix);
}
const timeout = options?.timeout ?? 30000; // Default timeout to 30 seconds
const listenerId = this.listenerIdCounter++;
const predicate = reqMatch as AwaitablePredicate<HTTPResponse>;
const responsePromise = new Promise<HTTPResponse>((resolve, reject) => {
const tm = setTimeout(() => {
if (this.responseCallbacks.has(listenerId)) {
this.responseCallbacks.delete(listenerId);
if (!options?.noThrow) {
reject(new Error('Timeout waiting for matching response'));
} else {
resolve(null);
}
}
}, timeout);
this.responseCallbacks.set(listenerId, async (response) => {
if (await predicate(response)) {
clearTimeout(tm);
resolve(response);
this.responseCallbacks.delete(listenerId);
}
});
});
try {
const result = await cb();
const response = await responsePromise;
return {
result,
response,
};
} finally {
this.responseCallbacks.delete(listenerId);
}
}
}
import { Browser, GoToOptions, Page } from 'puppeteer';
import BetterLock from 'better-lock';
import { createPuppeteer } from './create-puppeteer';
import * as cookie from 'cookie';
import { createDeferred } from './utility/defer';
import { PageRequestWaiter } from './page-request-waiter';
export interface PuppeteerWorkerOptions {
proxy?: string; // url format
}
function buildProxyArg(u: URL) {
const protocol = u.protocol || 'http:';
const host = u.host; // 'host:port'
return `${protocol}//${host}`;
}
export class PuppeteerWorker {
browser: Browser;
private lock = new BetterLock();
private sessions = new Set<Promise<void>>();
private readonly proxyUrl = this.options.proxy
? new URL(this.options.proxy)
: undefined;
constructor(private options: PuppeteerWorkerOptions = {}) {
this.initPromise = this.init();
}
private async waitForSessionsToEnd(): Promise<void> {
if (this.sessions.size === 0) return;
// 以我们维护的 promise 为准,等待所有已登记的会话落地
await Promise.allSettled([...this.sessions]);
}
private trackSession(): () => void {
const d = createDeferred();
this.sessions.add(d.promise);
return () => {
// 结束会话并移除
try {
d.resolve();
} finally {
this.sessions.delete(d.promise);
}
};
}
async close() {
if (this.browser) {
try {
await this.browser.close();
} catch (e) {}
this.browser = undefined!;
}
}
private async createBrowser() {
return this.lock.acquire('createBrowser', async () => {
await this.close();
const extraArgs: string[] = [];
if (this.proxyUrl) {
extraArgs.push(`--proxy-server=${buildProxyArg(this.proxyUrl)}`);
}
this.browser = await createPuppeteer(extraArgs);
});
}
initPromise: Promise<this>;
async init() {
await this.createBrowser();
return this;
}
async newPage() {
await this.initPromise;
return this.lock.acquire('newPage', async () => {
let page: Page;
try {
page = await this.browser.newPage();
} catch (e) {
await this.waitForSessionsToEnd();
await this.createBrowser();
page = await this.browser.newPage();
}
return page;
});
}
async usePage<T>(
cb: (page: Page, requestWaiter: PageRequestWaiter) => T | Promise<T>,
open?: {
url: string;
cookie?: string;
localStorage?: any;
timeout?: number;
otherOptions?: GoToOptions;
},
) {
let page: Page | undefined;
let endSession: (() => void) | undefined;
let requestWaiter: PageRequestWaiter = undefined;
try {
page = await this.newPage();
endSession = this.trackSession();
if (this.proxyUrl?.username || this.proxyUrl?.password) {
await page.authenticate({
username: this.proxyUrl.username,
password: this.proxyUrl.password,
});
}
if (open) {
const timeout = open.timeout ?? 120_000;
const urlObj = new URL(open.url);
if (open.cookie) {
const domain = urlObj.hostname;
const domainParts = domain.split('.');
const cookieDomain =
'.' +
(domainParts.length > 2 ? domainParts.slice(-2).join('.') : domain);
const context = page.browserContext();
await context.setCookie(
...Object.entries(cookie.parse(open.cookie)).map(
([name, value]) => ({
name,
value,
domain: cookieDomain,
}),
),
);
}
if (open.localStorage) {
const origin = urlObj.origin;
await page.setJavaScriptEnabled(false);
await page.goto(origin, {
waitUntil: 'domcontentloaded',
timeout,
});
const localStorageEncoded =
typeof open.localStorage === 'string'
? open.localStorage.startsWith('{') &&
open.localStorage.endsWith('}')
? Buffer.from(open.localStorage).toString('base64')
: open.localStorage
: Buffer.from(JSON.stringify(open.localStorage)).toString(
'base64',
);
await page.evaluate((encodedLocalStorage) => {
const sanitizedLocalStorage = encodedLocalStorage.replace(
/^'|'$/g,
'',
);
const decodedData = new Uint8Array(
atob(decodeURIComponent(sanitizedLocalStorage))
.split('')
.map((char) => char.charCodeAt(0)),
);
const jsonString = new TextDecoder().decode(decodedData);
const localStorageData = JSON.parse(jsonString);
for (const [key, value] of Object.entries(localStorageData)) {
localStorage.setItem(key, value as string);
}
}, localStorageEncoded);
await page.setJavaScriptEnabled(true);
}
await page.goto(open.url, {
timeout: 120_000,
...(open.otherOptions || {}),
});
}
requestWaiter = new PageRequestWaiter(page);
return await cb(page, requestWaiter);
} finally {
requestWaiter?.dispose();
if (page) {
try {
await page.close();
} catch (e) {}
}
if (endSession) {
try {
endSession();
} catch (e) {}
}
}
}
}
type Deferred = {
promise: Promise<void>;
resolve: () => void;
reject: (e: unknown) => void;
};
export function createDeferred(): Deferred {
let resolve!: () => void;
let reject!: (e: unknown) => void;
const promise = new Promise<void>((res, rej) => {
resolve = res;
reject = rej;
});
return { promise, resolve, reject };
}
/**
* tests/puppeteerWorker.integration.test.ts
*
* Integration tests for ../src/puppeteer-worker.ts
*
* Run serially to avoid conflicts:
* npx jest --runInBand tests/puppeteerWorker.integration.test.ts
*
* Environment variables:
* - SKIP_INTEGRATION=1 -> skip all tests
* - RUN_PROXY_TESTS=1 -> enable proxy test (default: skipped)
* - PROXY_URL -> proxy url to test with, e.g. "http://10.198.0.44:3128"
*
* Notes:
* - These are integration tests that will launch real Chromium via your project's
* create-puppeteer. Ensure your environment has Chrome/Chromium or create-puppeteer
* knows how to find it.
*/
import { PuppeteerWorker } from '../src/puppeteer-worker';
import path from 'path';
jest.setTimeout(120_000);
const SKIP_INTEGRATION = !!process.env.SKIP_INTEGRATION;
const RUN_PROXY_TESTS = !process.env.SKIP_PROXY_TESTS;
// Helper to short-circuit tests when SKIP_INTEGRATION is set
function skipIfIntegrationDisabled() {
if (SKIP_INTEGRATION) {
// eslint-disable-next-line no-console
console.warn(
'SKIP_INTEGRATION set — skipping PuppeteerWorker integration tests.',
);
return true;
}
return false;
}
describe('PuppeteerWorker integration tests (serial)', () => {
beforeAll(() => {
if (SKIP_INTEGRATION) {
// no-op, tests will early return
}
});
test('basic usage: open page and read title (baidu)', async () => {
if (skipIfIntegrationDisabled()) return;
const worker = new PuppeteerWorker();
const title = await worker.usePage(async (page) => {
await page.goto('https://www.baidu.com', {
waitUntil: 'domcontentloaded',
});
return page.title();
});
// 百度标题可能是中文,检查非空并含 "百度" 最好
expect(title).toEqual(expect.stringContaining('百度'));
});
test('concurrent pages: multiple parallel usePage calls (baidu)', async () => {
if (skipIfIntegrationDisabled()) return;
const worker = new PuppeteerWorker();
// 并发开 3 个 page
const tasks = new Array(3).fill(0).map((_, i) =>
worker.usePage(async (page) => {
// if (i === 0) await page.waitForTimeout(50);
await page.goto('https://www.baidu.com', {
waitUntil: 'domcontentloaded',
});
return page.evaluate(() => document.location.href);
}),
);
const results = await Promise.all(tasks);
results.forEach((r) => {
expect(r).toMatch(/^https?:\/\/(www\.)?baidu\.com/);
});
});
test('cookie injection: context.setCookie is applied and visible in document.cookie (baidu)', async () => {
if (skipIfIntegrationDisabled()) return;
const worker = new PuppeteerWorker();
const cookieStr = 'test_session=abc123; path=/;';
const cookieSeen = await worker.usePage(
async (page) => {
// 在 worker 内部会先注入 cookie 再 goto
await page.goto('https://www.baidu.com', {
waitUntil: 'domcontentloaded',
});
return page.evaluate(() => document.cookie);
},
{
url: 'https://www.baidu.com',
cookie: cookieStr,
},
);
expect(cookieSeen).toEqual(expect.stringContaining('test_session=abc123'));
});
test('localStorage injection: provided object appears in localStorage (baidu)', async () => {
if (skipIfIntegrationDisabled()) return;
const worker = new PuppeteerWorker();
const payload = { token: 'xyz-789', flag: '1' };
const readBack = await worker.usePage(
async (page) => {
// worker 会注入 localStorage,然后再导航到目标 url
return page.evaluate(() => {
return {
token: localStorage.getItem('token'),
flag: localStorage.getItem('flag'),
};
});
},
{
url: 'https://www.baidu.com',
localStorage: payload,
},
);
expect(readBack).toEqual({ token: 'xyz-789', flag: '1' });
});
test('proxy usage: visit ifconfig.me and page contains expected proxy IP', async () => {
if (skipIfIntegrationDisabled()) return;
if (!RUN_PROXY_TESTS) {
// eslint-disable-next-line no-console
console.warn('Skipping proxy test (set RUN_PROXY_TESTS=1 to enable).');
return;
}
const proxyUrl = process.env.PROXY_URL || 'http://10.198.0.44:3128';
const expectedIp = process.env.PROXY_EXPECTED_IP || '74.48.137.158';
const worker = new PuppeteerWorker({ proxy: proxyUrl });
// 访问 ifconfig.me,页面正文应该包含外网 IP(plain text)
const bodyText = await worker.usePage(
async (page) => {
// ifconfig.me 返回纯文本的 IP(也可能带换行),使用 textContent 获取
await page.goto('https://ifconfig.me', {
waitUntil: 'domcontentloaded',
timeout: 30000,
});
// 直接取 body.innerText / textContent,兼容纯文本/HTML 响应
return page.evaluate(() =>
document.body && document.body.innerText
? document.body.innerText
: document.documentElement?.innerText || '',
);
},
{
url: 'https://ifconfig.me',
},
);
// 断言 body 中包含预期的 IP(允许前后有空格或换行)
expect(bodyText).toEqual(expect.stringContaining(expectedIp));
});
test('PageRequestWaiter: wait for a navigation response (httpbin /get)', async () => {
const worker = new PuppeteerWorker();
const data = await worker.usePage(async (page, waiter) => {
// 等待导航到 httpbin /get 返回(把导航响应当作匹配目标)
const { result, response } = await waiter.waitForRequestAfter(
'/get',
async () => {
// 使用 httpbin 作为稳定公共服务
await page.goto('https://httpbin.org/get', {
waitUntil: 'domcontentloaded',
});
return 'navigated';
},
{ timeout: 30000 },
);
expect(result).toBe('navigated');
expect(response).not.toBeNull();
expect(response!.ok()).toBe(true);
const json = await response!.json();
// httpbin /get 返回 JSON,包含 url 字段
expect(json).toHaveProperty('url');
expect(json.url).toEqual('https://httpbin.org/get');
return json;
});
expect(data).toHaveProperty('url', 'https://httpbin.org/get');
});
});
{
"compilerOptions": {
"outDir": "dist",
"module": "commonjs",
"target": "es2021",
"esModuleInterop": true,
"emitDecoratorMetadata": true,
"experimentalDecorators": true,
"declaration": true,
"sourceMap": true
},
"compileOnSave": true,
"allowJs": true,
"include": [
"*.ts",
"src/**/*.ts",
"test/**/*.ts",
"tests/**/*.ts"
]
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment