diff --git a/utils/tools/WebsiteTool.js b/utils/tools/WebsiteTool.js index 67bd830..b3dfd55 100644 --- a/utils/tools/WebsiteTool.js +++ b/utils/tools/WebsiteTool.js @@ -4,6 +4,7 @@ import { Config } from '../config.js' import fetch from 'node-fetch' import proxy from 'https-proxy-agent' import { getMaxModelTokens } from '../common.js' +import { ChatGPTPuppeteer } from '../browser.js' export class WebsiteTool extends AbstractTool { name = 'website' @@ -20,12 +21,36 @@ export class WebsiteTool extends AbstractTool { func = async function (opts) { let { url } = opts try { - let res = await fetch(url, { - headers: { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' - } + // let res = await fetch(url, { + // headers: { + // 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' + // } + // }) + // let text = await res.text() + let origin = false + if (!Config.headless) { + Config.headless = true + origin = true + } + let ppt = new ChatGPTPuppeteer() + let browser = await ppt.getBrowser() + let page = await browser.newPage() + await page.goto(url, { + waitUntil: 'networkidle2' }) - let text = await res.text() + let text = await page.content() + await page.close() + if (origin) { + Config.headless = false + } + // text = text.replace(/)<[^<]*)*<\/style>/gi, '') + // .replace(/)<[^<]*)*<\/script>/gi, '') + // .replace(/)<[^<]*)*<\/head>/gi, '') + // .replace(//gi, '') + text = text.replace(/)<[^<]*)*<\/style>/gi, '') // 移除