Commit 2e0990e9 authored by Travis Fischer's avatar Travis Fischer

feat: add ChatGPTAPIBrowser for increased robustness; less efficient but less 429/403/503 errors

parent dd872320
import dotenv from 'dotenv-safe'
import { oraPromise } from 'ora'
import { ChatGPTAPI, getOpenAIAuth } from '../src'
import { ChatGPTAPIBrowser } from '../src'
dotenv.config()
......@@ -16,13 +16,9 @@ async function main() {
const email = process.env.OPENAI_EMAIL
const password = process.env.OPENAI_PASSWORD
const authInfo = await getOpenAIAuth({
email,
password
})
const api = new ChatGPTAPI({ ...authInfo })
await api.ensureAuth()
const api = new ChatGPTAPIBrowser({ email, password })
const res = await api.init()
console.log('init result', res)
const prompt =
'Write a python version of bubble sort. Do not include example usage.'
......@@ -31,6 +27,7 @@ async function main() {
text: prompt
})
await api.close()
return response
}
......
......@@ -10,6 +10,7 @@ specifiers:
dotenv-safe: ^8.2.0
eventsource-parser: ^0.0.5
expiry-map: ^2.0.0
html-to-md: ^0.8.3
husky: ^8.0.2
lint-staged: ^13.0.3
npm-run-all: ^4.1.5
......@@ -18,6 +19,7 @@ specifiers:
prettier: ^2.8.0
puppeteer: ^19.4.0
puppeteer-extra: ^3.3.4
puppeteer-extra-plugin-recaptcha: ^3.6.6
puppeteer-extra-plugin-stealth: ^2.11.1
remark: ^14.0.2
strip-markdown: ^5.0.0
......@@ -32,8 +34,10 @@ dependencies:
delay: 5.0.0
eventsource-parser: 0.0.5
expiry-map: 2.0.0
html-to-md: 0.8.3
p-timeout: 6.0.0
puppeteer-extra: 3.3.4_puppeteer@19.4.0
puppeteer-extra-plugin-recaptcha: 3.6.6_puppeteer-extra@3.3.4
puppeteer-extra-plugin-stealth: 2.11.1_puppeteer-extra@3.3.4
remark: 14.0.2
strip-markdown: 5.0.0
......@@ -1789,6 +1793,10 @@ packages:
lru-cache: 6.0.0
dev: true
/html-to-md/0.8.3:
resolution: {integrity: sha512-Va+bB1YOdD6vMRDue9/l7YxbERgwOgsos4erUDRfRN6YE0B2Wbbw8uAj6xZJk9A9vrjVy7mG/WLlhDw6RXfgsA==}
dev: false
/https-proxy-agent/5.0.1:
resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
engines: {node: '>= 6'}
......@@ -3065,6 +3073,26 @@ packages:
- supports-color
- utf-8-validate
/puppeteer-extra-plugin-recaptcha/3.6.6_puppeteer-extra@3.3.4:
resolution: {integrity: sha512-SVbmL+igGX8m0Qg9dn85trWDghbfUCTG/QUHYscYx5XgMZVVb0/v0a6MqbPdHoKmBx5BS2kLd6rorMlncMcXdw==}
engines: {node: '>=9.11.2'}
peerDependencies:
playwright-extra: '*'
puppeteer-extra: '*'
peerDependenciesMeta:
playwright-extra:
optional: true
puppeteer-extra:
optional: true
dependencies:
debug: 4.3.4
merge-deep: 3.0.3
puppeteer-extra: 3.3.4_puppeteer@19.4.0
puppeteer-extra-plugin: 3.2.2_puppeteer-extra@3.3.4
transitivePeerDependencies:
- supports-color
dev: false
/puppeteer-extra-plugin-stealth/2.11.1_puppeteer-extra@3.3.4:
resolution: {integrity: sha512-n0wdC0Ilc9tk5L6FWLyd0P2gT8b2fp+2NuB+KB0oTSw3wXaZ0D6WNakjJsayJ4waGzIJFCUHkmK9zgx5NKMoFw==}
engines: {node: '>=8'}
......
import delay from 'delay'
import html2md from 'html-to-md'
import { type Browser, type HTTPResponse, type Page } from 'puppeteer'
import * as types from './types'
import { getBrowser, getOpenAIAuth } from './openai-auth'
export class ChatGPTAPIBrowser {
protected _markdown: boolean
protected _debug: boolean
protected _isGoogleLogin: boolean
protected _captchaToken: string
protected _email: string
protected _password: string
protected _browser: Browser
protected _page: Page
/**
* Creates a new client wrapper for automating the ChatGPT webapp.
*/
constructor(opts: {
email: string
password: string
/** @defaultValue `true` **/
markdown?: boolean
/** @defaultValue `false` **/
debug?: boolean
isGoogleLogin?: boolean
captchaToken?: string
}) {
const {
email,
password,
markdown = true,
debug = false,
isGoogleLogin = false,
captchaToken
} = opts
this._email = email
this._password = password
this._markdown = !!markdown
this._debug = !!debug
this._isGoogleLogin = !!isGoogleLogin
this._captchaToken = captchaToken
}
async init() {
if (this._browser) {
await this._browser.close()
this._page = null
this._browser = null
}
this._browser = await getBrowser({ captchaToken: this._captchaToken })
this._page =
(await this._browser.pages())[0] || (await this._browser.newPage())
// bypass cloudflare and login
await getOpenAIAuth({
email: this._email,
password: this._password,
browser: this._browser,
page: this._page,
isGoogleLogin: this._isGoogleLogin
})
const chatUrl = 'https://chat.openai.com/chat'
const url = this._page.url().replace(/\/$/, '')
if (url !== chatUrl) {
await this._page.goto(chatUrl, {
waitUntil: 'networkidle0'
})
}
// dismiss welcome modal
do {
const modalSelector = '[data-headlessui-state="open"]'
if (!(await this._page.$(modalSelector))) {
break
}
try {
await this._page.click(`${modalSelector} button:last-child`)
} catch (err) {
// "next" button not found in welcome modal
break
}
await delay(500)
} while (true)
if (!this.getIsAuthenticated()) {
return false
}
// this._page.on('response', this._onResponse.bind(this))
return true
}
// _onResponse = (response: HTTPResponse) => {
// const request = response.request()
// console.log('response', {
// url: response.url(),
// ok: response.ok(),
// status: response.status(),
// statusText: response.statusText(),
// headers: response.headers(),
// request: {
// method: request.method(),
// headers: request.headers()
// }
// })
// }
async getIsAuthenticated() {
try {
const inputBox = await this._getInputBox()
return !!inputBox
} catch (err) {
// can happen when navigating during login
return false
}
}
async getLastMessage(): Promise<string | null> {
const messages = await this.getMessages()
if (messages) {
return messages[messages.length - 1]
} else {
return null
}
}
async getPrompts(): Promise<string[]> {
// Get all prompts
const messages = await this._page.$$(
'.text-base:has(.whitespace-pre-wrap):not(:has(button:nth-child(2))) .whitespace-pre-wrap'
)
// Prompts are always plaintext
return Promise.all(messages.map((a) => a.evaluate((el) => el.textContent)))
}
async getMessages(): Promise<string[]> {
// Get all complete messages
// (in-progress messages that are being streamed back don't contain action buttons)
const messages = await this._page.$$(
'.text-base:has(.whitespace-pre-wrap):has(button:nth-child(2)) .whitespace-pre-wrap'
)
if (this._markdown) {
const htmlMessages = await Promise.all(
messages.map((a) => a.evaluate((el) => el.innerHTML))
)
const markdownMessages = htmlMessages.map((messageHtml) => {
// parse markdown from message HTML
messageHtml = messageHtml.replace('Copy code</button>', '</button>')
return html2md(messageHtml, {
ignoreTags: [
'button',
'svg',
'style',
'form',
'noscript',
'script',
'meta',
'head'
],
skipTags: ['button', 'svg']
})
})
return markdownMessages
} else {
// plaintext
const plaintextMessages = await Promise.all(
messages.map((a) => a.evaluate((el) => el.textContent))
)
return plaintextMessages
}
}
async sendMessage(message: string): Promise<string> {
const inputBox = await this._getInputBox()
if (!inputBox) throw new Error('not signed in')
const lastMessage = await this.getLastMessage()
await inputBox.click()
await inputBox.type(message, { delay: 0 })
await inputBox.press('Enter')
do {
await delay(1000)
// TODO: this logic needs some work because we can have repeat messages...
const newLastMessage = await this.getLastMessage()
if (
newLastMessage &&
lastMessage?.toLowerCase() !== newLastMessage?.toLowerCase()
) {
return newLastMessage
}
} while (true)
}
async resetThread() {
const resetButton = await this._page.$('nav > a:nth-child(1)')
if (!resetButton) throw new Error('not signed in')
await resetButton.click()
}
async close() {
await this._browser.close()
this._page = null
this._browser = null
}
protected async _getInputBox() {
// [data-id="root"]
return this._page.$('textarea')
}
}
......@@ -95,6 +95,7 @@ export class ChatGPTAPI {
'user-agent': this._userAgent,
'x-openai-assistant-app-id': '',
'accept-language': 'en-US,en;q=0.9',
'accept-encoding': 'gzip, deflate, br',
origin: 'https://chat.openai.com',
referer: 'https://chat.openai.com/chat',
'sec-ch-ua':
......@@ -299,6 +300,45 @@ export class ChatGPTAPI {
}
}
async sendModeration(input: string) {
const accessToken = await this.refreshAccessToken()
const url = `${this._backendApiBaseUrl}/moderations`
const headers = {
...this._headers,
Authorization: `Bearer ${accessToken}`,
Accept: '*/*',
'Content-Type': 'application/json',
Cookie: `cf_clearance=${this._clearanceToken}`
}
const body: types.ModerationsJSONBody = {
input,
model: 'text-moderation-playground'
}
if (this._debug) {
console.log('POST', url, headers, body)
}
const res = await fetch(url, {
method: 'POST',
headers,
body: JSON.stringify(body)
}).then((r) => {
if (!r.ok) {
const error = new types.ChatGPTError(`${r.status} ${r.statusText}`)
error.response = r
error.statusCode = r.status
error.statusText = r.statusText
throw error
}
return r.json() as any as types.ModerationsJSONResult
})
return res
}
/**
* @returns `true` if the client has a valid acces token or `false` if refreshing
* the token fails.
......
export * from './chatgpt-api'
export * from './chatgpt-api-browser'
export * from './chatgpt-conversation'
export * from './types'
export * from './utils'
......
......@@ -10,12 +10,15 @@ import {
type PuppeteerLaunchOptions
} from 'puppeteer'
import puppeteer from 'puppeteer-extra'
import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha'
import StealthPlugin from 'puppeteer-extra-plugin-stealth'
import * as types from './types'
puppeteer.use(StealthPlugin())
let hasRecaptchaPlugin = false
/**
* Represents everything that's required to pass into `ChatGPTAPI` in order
* to authenticate with the unofficial ChatGPT API.
......@@ -46,47 +49,64 @@ export async function getOpenAIAuth({
email,
password,
browser,
page,
timeoutMs = 2 * 60 * 1000,
isGoogleLogin = false
// TODO: temporary for testing...
// timeoutMs = 60 * 60 * 1000,
isGoogleLogin = false,
captchaToken = process.env.CAPTCHA_TOKEN
}: {
email?: string
password?: string
browser?: Browser
page?: Page
timeoutMs?: number
isGoogleLogin?: boolean
captchaToken?: string
}): Promise<OpenAIAuth> {
let page: Page
let origBrowser = browser
const origBrowser = browser
const origPage = page
try {
if (!browser) {
browser = await getBrowser()
browser = await getBrowser({ captchaToken })
}
const userAgent = await browser.userAgent()
page = (await browser.pages())[0] || (await browser.newPage())
page.setDefaultTimeout(timeoutMs)
if (!page) {
page = (await browser.pages())[0] || (await browser.newPage())
page.setDefaultTimeout(timeoutMs)
}
await page.goto('https://chat.openai.com/auth/login')
await page.goto('https://chat.openai.com/auth/login', {
waitUntil: 'networkidle0'
})
// NOTE: this is where you may encounter a CAPTCHA
await checkForChatGPTAtCapacity(page)
if (hasRecaptchaPlugin) {
await page.solveRecaptchas()
}
await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs })
await checkForChatGPTAtCapacity(page)
// once we get to this point, the Cloudflare cookies are available
await delay(1000)
// once we get to this point, the Cloudflare cookies should be available
// login as well (optional)
if (email && password) {
await page.waitForSelector('#__next .btn-primary', { timeout: timeoutMs })
await delay(500)
await Promise.all([
// click login button
page.click('#__next .btn-primary'),
page.waitForNavigation({
waitUntil: 'networkidle0'
})
])
let submitP: Promise<void>
await checkForChatGPTAtCapacity(page)
let submitP: () => Promise<void>
if (isGoogleLogin) {
await page.click('button[data-provider="google"]')
......@@ -98,19 +118,25 @@ export async function getOpenAIAuth({
])
await page.waitForSelector('input[type="password"]', { visible: true })
await page.type('input[type="password"]', password, { delay: 10 })
submitP = page.keyboard.press('Enter')
submitP = () => page.keyboard.press('Enter')
} else {
await page.waitForSelector('#username')
await page.type('#username', email, { delay: 10 })
await page.type('#username', email, { delay: 20 })
await delay(100)
if (hasRecaptchaPlugin) {
console.log('solveRecaptchas()')
const res = await page.solveRecaptchas()
console.log('solveRecaptchas result', res)
}
await page.click('button[type="submit"]')
await page.waitForSelector('#password')
await page.type('#password', password, { delay: 10 })
submitP = page.click('button[type="submit"]')
submitP = () => page.click('button[type="submit"]')
}
await Promise.all([
submitP,
new Promise<void>((resolve, reject) => {
let resolved = false
......@@ -151,7 +177,9 @@ export async function getOpenAIAuth({
})
setTimeout(waitForCapacityText, 500)
})
}),
submitP()
])
}
......@@ -170,11 +198,10 @@ export async function getOpenAIAuth({
return authInfo
} catch (err) {
console.error(err)
throw err
} finally {
if (origBrowser) {
if (page) {
if (page && page !== origPage) {
await page.close()
}
} else if (browser) {
......@@ -191,7 +218,28 @@ export async function getOpenAIAuth({
* able to use the built-in `puppeteer` version of Chromium because Cloudflare
* recognizes it and blocks access.
*/
export async function getBrowser(launchOptions?: PuppeteerLaunchOptions) {
export async function getBrowser(
opts: PuppeteerLaunchOptions & {
captchaToken?: string
} = {}
) {
const { captchaToken = process.env.CAPTCHA_TOKEN, ...launchOptions } = opts
if (captchaToken && !hasRecaptchaPlugin) {
hasRecaptchaPlugin = true
console.log('use captcha', captchaToken)
puppeteer.use(
RecaptchaPlugin({
provider: {
id: '2captcha',
token: captchaToken
},
visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved)
})
)
}
return puppeteer.launch({
headless: false,
args: ['--no-sandbox', '--exclude-switches', 'enable-automation'],
......@@ -212,16 +260,17 @@ export const defaultChromeExecutablePath = (): string => {
case 'darwin':
return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'
default:
default: {
/**
* Since two (2) separate chrome releases exists on linux
* we first do a check to ensure we're executing the right one.
* Since two (2) separate chrome releases exist on linux, we first do a
* check to ensure we're executing the right one.
*/
const chromeExists = fs.existsSync('/usr/bin/google-chrome')
return chromeExists
? '/usr/bin/google-chrome'
: '/usr/bin/google-chrome-stable'
}
}
}
......@@ -231,6 +280,12 @@ async function checkForChatGPTAtCapacity(page: Page) {
try {
// res = await page.$('[role="alert"]')
res = await page.$x("//div[contains(., 'ChatGPT is at capacity')]")
console.log('capacity', res)
if (!res?.length) {
res = await page.$x("//div[contains(., 'at capacity right now')]")
console.log('capacity2', res)
}
} catch (err) {
// ignore errors likely due to navigation
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment