mirror of
https://github.com/ikechan8370/chatgpt-plugin.git
synced 2025-12-17 22:07:10 +00:00
fix: website工具用浏览器
This commit is contained in:
parent
cd13b829e7
commit
ed337275d5
1 changed files with 31 additions and 6 deletions
|
|
@ -4,6 +4,7 @@ import { Config } from '../config.js'
|
|||
import fetch from 'node-fetch'
|
||||
import proxy from 'https-proxy-agent'
|
||||
import { getMaxModelTokens } from '../common.js'
|
||||
import { ChatGPTPuppeteer } from '../browser.js'
|
||||
export class WebsiteTool extends AbstractTool {
|
||||
name = 'website'
|
||||
|
||||
|
|
@ -20,12 +21,36 @@ export class WebsiteTool extends AbstractTool {
|
|||
func = async function (opts) {
|
||||
let { url } = opts
|
||||
try {
|
||||
let res = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
|
||||
}
|
||||
// let res = await fetch(url, {
|
||||
// headers: {
|
||||
// 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
|
||||
// }
|
||||
// })
|
||||
// let text = await res.text()
|
||||
let origin = false
|
||||
if (!Config.headless) {
|
||||
Config.headless = true
|
||||
origin = true
|
||||
}
|
||||
let ppt = new ChatGPTPuppeteer()
|
||||
let browser = await ppt.getBrowser()
|
||||
let page = await browser.newPage()
|
||||
await page.goto(url, {
|
||||
waitUntil: 'networkidle2'
|
||||
})
|
||||
let text = await res.text()
|
||||
let text = await page.content()
|
||||
await page.close()
|
||||
if (origin) {
|
||||
Config.headless = false
|
||||
}
|
||||
// text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
|
||||
// .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
|
||||
// .replace(/<head\b[^<]*(?:(?!<\/head>)<[^<]*)*<\/head>/gi, '')
|
||||
// .replace(/<!--[\s\S]*?-->/gi, '')
|
||||
text = text.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '') // 移除<style>标签及其内容
|
||||
.replace(/<[^>]+style\s*=\s*(["'])(?:(?!\1).)*\1[^>]*>/gi, '') // 移除带有style属性的标签
|
||||
.replace(/<[^>]+>/g, '')
|
||||
|
||||
let maxModelTokens = getMaxModelTokens(Config.model)
|
||||
text = text.slice(0, Math.min(text.length, maxModelTokens - 1600))
|
||||
let api = new ChatGPTAPI({
|
||||
|
|
@ -49,7 +74,7 @@ export class WebsiteTool extends AbstractTool {
|
|||
},
|
||||
maxModelTokens
|
||||
})
|
||||
const htmlContentSummaryRes = await api.sendMessage(`这是一个网页html的内容,请你从中提取出其中的主体内容告诉我。${text}`)
|
||||
const htmlContentSummaryRes = await api.sendMessage(`这是一个网页html经过筛选的内容,请你进一步去掉其中的标签、样式、script等无用信息,并从中提取出其中的主体内容转换成自然语言告诉我,不需要主观描述性的语言。${text}`)
|
||||
let htmlContentSummary = htmlContentSummaryRes.text
|
||||
return `this is the main content of website:\n ${htmlContentSummary}`
|
||||
} catch (err) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue