mirror of
https://github.com/ikechan8370/chatgpt-plugin.git
synced 2025-12-16 21:37:11 +00:00
feat: 为必应和claude2提供读取文件能力支持
This commit is contained in:
parent
6c171b32a1
commit
1a95c67130
7 changed files with 369 additions and 46 deletions
|
|
@ -41,6 +41,7 @@
|
|||
* 2023-05-29 支持gpt-4 API.必应无需cookie即可对话(Sydney和自定义模式)
|
||||
* 2023-07 支持智能模式,机器人可以实现禁言、群名片/头衔(需给机器人管理员/群主)、分享音乐视频、主动发音频、对接ap,sr和喵喵等插件、联网搜索等,需api模式0613系列模型。智能模式所需的额外api和搜索api分别可以参考[chatgpt-plugin-extras](https://github.com/ikechan8370/chatgpt-plugin-extras) 和 [search-api](https://github.com/ikechan8370/search-api) 自行搭建,其中后者提供了一个公益版本,前者可使用[huggingface](https://huggingface.co/spaces/ikechan8370/cp-extra)部署
|
||||
* 2023-09-10 支持来自claude.ai的claude-2模型
|
||||
* 2023-10-19 支持读取文件,(目前适配必应模式和Claude2模式)
|
||||
### 如果觉得这个插件有趣或者对你有帮助,请点一个star吧!
|
||||
|
||||
## 版本要求
|
||||
|
|
|
|||
74
apps/chat.js
74
apps/chat.js
|
|
@ -26,7 +26,15 @@ import {
|
|||
getUserReplySetting,
|
||||
getImageOcrText,
|
||||
getImg,
|
||||
getMaxModelTokens, formatDate, generateAudio, formatDate2, mkdirs, getUin
|
||||
getMaxModelTokens,
|
||||
formatDate,
|
||||
generateAudio,
|
||||
formatDate2,
|
||||
mkdirs,
|
||||
getUin,
|
||||
downloadFile,
|
||||
isPureText,
|
||||
extractContentFromFile
|
||||
} from '../utils/common.js'
|
||||
import { ChatGPTPuppeteer } from '../utils/browser.js'
|
||||
import { KeyvFile } from 'keyv-file'
|
||||
|
|
@ -1626,6 +1634,20 @@ export class chatgpt extends plugin {
|
|||
logger.warn('获取群聊聊天记录失败,本次对话不携带聊天记录', err)
|
||||
}
|
||||
}
|
||||
let toSummaryFileContent
|
||||
try {
|
||||
if (e.source) {
|
||||
let msgs = e.isGroup ? await e.group.getChatHistory(e.source.seq, 1) : await e.friend.getChatHistory(e.source.time, 1)
|
||||
let sourceMsg = msgs[0]
|
||||
let fileMsgElem = sourceMsg.message.find(msg => msg.type === 'file')
|
||||
if (fileMsgElem) {
|
||||
toSummaryFileContent = await extractContentFromFile(fileMsgElem, e)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('读取文件内容出错, 忽略文件内容', err)
|
||||
}
|
||||
opt.toSummaryFileContent = toSummaryFileContent
|
||||
} else {
|
||||
// 重新创建client,因为token可能换到别的了
|
||||
if (bingToken?.indexOf('=') > -1) {
|
||||
|
|
@ -1893,40 +1915,30 @@ export class chatgpt extends plugin {
|
|||
debug: Config.debug,
|
||||
proxy: Config.proxy
|
||||
})
|
||||
let fileUrl, filename, attachments
|
||||
if (e.source && e.source.message === '[文件]') {
|
||||
if (e.isGroup) {
|
||||
let source = (await e.group.getChatHistory(e.source.seq, 1))[0]
|
||||
let file = source.message.find(m => m.type === 'file')
|
||||
if (file) {
|
||||
filename = file.name
|
||||
fileUrl = await e.group.getFileUrl(file.fid)
|
||||
}
|
||||
} else {
|
||||
let source = (await e.friend.getChatHistory(e.source.time, 1))[0]
|
||||
let file = source.message.find(m => m.type === 'file')
|
||||
if (file) {
|
||||
filename = file.name
|
||||
fileUrl = await e.group.getFileUrl(file.fid)
|
||||
let toSummaryFileContent
|
||||
try {
|
||||
if (e.source) {
|
||||
let msgs = e.isGroup ? await e.group.getChatHistory(e.source.seq, 1) : await e.friend.getChatHistory(e.source.time, 1)
|
||||
let sourceMsg = msgs[0]
|
||||
let fileMsgElem = sourceMsg.message.find(msg => msg.type === 'file')
|
||||
if (fileMsgElem) {
|
||||
toSummaryFileContent = await extractContentFromFile(fileMsgElem, e)
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('读取文件内容出错, 忽略文件内容', err)
|
||||
}
|
||||
if (fileUrl) {
|
||||
logger.info('文件地址:' + fileUrl)
|
||||
mkdirs('data/chatgpt/files')
|
||||
let destinationPath = 'data/chatgpt/files/' + filename
|
||||
const response = await fetch(fileUrl)
|
||||
const fileStream = fs.createWriteStream(destinationPath)
|
||||
await new Promise((resolve, reject) => {
|
||||
response.body.pipe(fileStream)
|
||||
response.body.on('error', (err) => {
|
||||
reject(err)
|
||||
})
|
||||
fileStream.on('finish', () => {
|
||||
resolve()
|
||||
})
|
||||
|
||||
let attachments = []
|
||||
if (toSummaryFileContent?.content) {
|
||||
attachments.push({
|
||||
extracted_content: toSummaryFileContent.content,
|
||||
file_name: toSummaryFileContent.name,
|
||||
file_type: 'pdf',
|
||||
file_size: 200312,
|
||||
totalPages: 20
|
||||
})
|
||||
attachments = [await client.convertDocument(destinationPath, filename)]
|
||||
logger.info(toSummaryFileContent.content)
|
||||
}
|
||||
if (conversationId) {
|
||||
return await client.sendMessage(prompt, conversationId, attachments)
|
||||
|
|
|
|||
|
|
@ -35,14 +35,18 @@
|
|||
"ws": "^8.13.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"xlsx": "^0.18.5",
|
||||
"mammoth": "^1.6.0",
|
||||
"pdfjs-dist": "^3.11.174",
|
||||
"nodejs-pptx": "^1.2.4",
|
||||
"@node-rs/jieba": "^1.6.2",
|
||||
"cycletls": "^1.0.21",
|
||||
"jimp": "^0.22.7",
|
||||
"node-silk": "^0.1.0",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"sharp": "^0.32.3",
|
||||
"cycletls": "^1.0.21"
|
||||
"sharp": "^0.32.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
"ts-node": "^10.9.1",
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import fetch, {
|
|||
import crypto from 'crypto'
|
||||
import WebSocket from 'ws'
|
||||
import { Config, pureSydneyInstruction } from './config.js'
|
||||
import { formatDate, getMasterQQ, isCN, getUserData } from './common.js'
|
||||
import { formatDate, getMasterQQ, isCN, getUserData, limitString } from './common.js'
|
||||
import delay from 'delay'
|
||||
import moment from 'moment'
|
||||
import { getProxy } from './proxy.js'
|
||||
|
|
@ -222,8 +222,8 @@ export default class SydneyAIClient {
|
|||
timeout = Config.defaultTimeoutMs,
|
||||
firstMessageTimeout = Config.sydneyFirstMessageTimeout,
|
||||
groupId, nickname, qq, groupName, chats, botName, masterName,
|
||||
messageType = 'Chat'
|
||||
|
||||
messageType = 'Chat',
|
||||
toSummaryFileContent
|
||||
} = opts
|
||||
// if (messageType === 'Chat') {
|
||||
// logger.warn('该Bing账户token已被限流,降级至使用非搜索模式。本次对话AI将无法使用Bing搜索返回的内容')
|
||||
|
|
@ -372,6 +372,10 @@ export default class SydneyAIClient {
|
|||
let maxConv = Config.maxNumUserMessagesInConversation
|
||||
const currentDate = moment().format('YYYY-MM-DDTHH:mm:ssZ')
|
||||
const imageDate = await this.kblobImage(opts.imageUrl)
|
||||
if (toSummaryFileContent?.content) {
|
||||
// message = `请不要进行搜索,用户的问题是:"${message}"`
|
||||
messageType = 'Chat'
|
||||
}
|
||||
let argument0 = {
|
||||
source: 'cib',
|
||||
optionsSets,
|
||||
|
|
@ -415,10 +419,12 @@ export default class SydneyAIClient {
|
|||
text: message,
|
||||
messageType,
|
||||
userIpAddress: await generateRandomIP(),
|
||||
timestamp: currentDate
|
||||
timestamp: currentDate,
|
||||
privacy: 'Internal'
|
||||
// messageType: 'SearchQuery'
|
||||
},
|
||||
tone: 'Creative',
|
||||
privacy: 'Internal',
|
||||
conversationSignature,
|
||||
participant: {
|
||||
id: clientId
|
||||
|
|
@ -440,7 +446,7 @@ export default class SydneyAIClient {
|
|||
}
|
||||
// simulates document summary function on Edge's Bing sidebar
|
||||
// unknown character limit, at least up to 7k
|
||||
if (groupId) {
|
||||
if (groupId && !toSummaryFileContent?.content) {
|
||||
context += '注意,你现在正在一个qq群里和人聊天,现在问你问题的人是' + `${nickname}(${qq})。`
|
||||
if (Config.enforceMaster && master) {
|
||||
if (qq === master) {
|
||||
|
|
@ -493,6 +499,17 @@ export default class SydneyAIClient {
|
|||
messageType: 'Context',
|
||||
messageId: 'discover-web--page-ping-mriduna-----'
|
||||
})
|
||||
} else if (toSummaryFileContent?.content) {
|
||||
obj.arguments[0].previousMessages.push({
|
||||
author: 'user',
|
||||
description: limitString(toSummaryFileContent?.content, 50000, true),
|
||||
contextType: 'WebPage',
|
||||
messageType: 'Context',
|
||||
sourceName: toSummaryFileContent?.name,
|
||||
sourceUrl: 'file:///C:/Users/turing/Downloads/Documents/' + toSummaryFileContent?.name || 'file.pdf',
|
||||
// locale: 'und',
|
||||
// privacy: 'Internal'
|
||||
})
|
||||
} else {
|
||||
obj.arguments[0].previousMessages.push({
|
||||
author: 'user',
|
||||
|
|
|
|||
71
utils/bilibili/wbi.js
Normal file
71
utils/bilibili/wbi.js
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import md5 from 'md5'
|
||||
import fetch from 'node-fetch'
|
||||
|
||||
const mixinKeyEncTab = [
|
||||
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
|
||||
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
|
||||
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
|
||||
36, 20, 34, 44, 52
|
||||
]
|
||||
|
||||
// 对 imgKey 和 subKey 进行字符顺序打乱编码
|
||||
function getMixinKey (orig) {
|
||||
let temp = ''
|
||||
mixinKeyEncTab.forEach((n) => {
|
||||
temp += orig[n]
|
||||
})
|
||||
return temp.slice(0, 32)
|
||||
}
|
||||
|
||||
// 为请求参数进行 wbi 签名
|
||||
function encWbi (params, imgKey, subKey) {
|
||||
const mixinKey = getMixinKey(imgKey + subKey)
|
||||
const currTime = Math.round(Date.now() / 1000)
|
||||
const chrFilter = /[!'()*]/g
|
||||
let query = []
|
||||
Object.assign(params, { wts: currTime }) // 添加 wts 字段
|
||||
// 按照 key 重排参数
|
||||
Object.keys(params).sort().forEach((key) => {
|
||||
query.push(
|
||||
`${encodeURIComponent(key)}=${encodeURIComponent(
|
||||
// 过滤 value 中的 "!'()*" 字符
|
||||
params[key].toString().replace(chrFilter, '')
|
||||
)}`
|
||||
)
|
||||
})
|
||||
query = query.join('&')
|
||||
const wbiSign = md5(query + mixinKey) // 计算 w_rid
|
||||
return query + '&w_rid=' + wbiSign
|
||||
}
|
||||
|
||||
// 获取最新的 img_key 和 sub_key
|
||||
async function getWbiKeys () {
|
||||
const resp = await fetch('https://api.bilibili.com/x/web-interface/nav')
|
||||
const jsonContent = resp.data
|
||||
const imgUrl = jsonContent.data.wbi_img.img_url
|
||||
const subUrl = jsonContent.data.wbi_img.sub_url
|
||||
|
||||
return {
|
||||
img_key: imgUrl.slice(
|
||||
imgUrl.lastIndexOf('/') + 1,
|
||||
imgUrl.lastIndexOf('.')
|
||||
),
|
||||
sub_key: subUrl.slice(
|
||||
subUrl.lastIndexOf('/') + 1,
|
||||
subUrl.lastIndexOf('.')
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// getWbiKeys().then((wbi_keys) => {
|
||||
// const query = encWbi(
|
||||
// {
|
||||
// foo: '114',
|
||||
// bar: '514',
|
||||
// baz: 1919810
|
||||
// },
|
||||
// wbi_keys.img_key,
|
||||
// wbi_keys.sub_key
|
||||
// )
|
||||
// console.log(query)
|
||||
// })
|
||||
|
|
@ -186,6 +186,7 @@ export class ClaudeAIClient {
|
|||
} else if (streamDataRes.status === 408) {
|
||||
throw new Error('claude.ai响应超时,可能是回复文本太多,请调高超时时间重试')
|
||||
} else {
|
||||
logger.error(streamDataRes.status, streamDataRes.body)
|
||||
throw new Error('unknown error')
|
||||
}
|
||||
}
|
||||
|
|
|
|||
233
utils/common.js
233
utils/common.js
|
|
@ -1,5 +1,3 @@
|
|||
// import { remark } from 'remark'
|
||||
// import stripMarkdown from 'strip-markdown'
|
||||
import { exec } from 'child_process'
|
||||
import lodash from 'lodash'
|
||||
import fs from 'node:fs'
|
||||
|
|
@ -15,12 +13,26 @@ import AzureTTS, { supportConfigurations as azureRoleList } from './tts/microsof
|
|||
import { translate } from './translate.js'
|
||||
import uploadRecord from './uploadRecord.js'
|
||||
import Version from './version.js'
|
||||
// export function markdownToText (markdown) {
|
||||
// return remark()
|
||||
// .use(stripMarkdown)
|
||||
// .processSync(markdown ?? '')
|
||||
// .toString()
|
||||
// }
|
||||
import fetch from 'node-fetch'
|
||||
let pdfjsLib
|
||||
try {
|
||||
pdfjsLib = require('pdfjs-dist')
|
||||
} catch (err) {}
|
||||
|
||||
let mammoth
|
||||
try {
|
||||
mammoth = require('mammoth')
|
||||
} catch (err) {}
|
||||
|
||||
let XLSX
|
||||
try {
|
||||
XLSX = require('xlsx')
|
||||
} catch (err) {}
|
||||
|
||||
let PPTX
|
||||
try {
|
||||
PPTX = require('nodejs-pptx')
|
||||
} catch (err) {}
|
||||
|
||||
let _puppeteer
|
||||
try {
|
||||
|
|
@ -972,3 +984,208 @@ export function getUserSpeaker (userSetting) {
|
|||
return userSetting.ttsRoleVoiceVox || Config.voicevoxTTSSpeaker
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param url 要下载的文件链接
|
||||
* @param destPath 目标路径,如received/abc.pdf. 目前如果文件名重复会覆盖。
|
||||
* @param absolute 是否是绝对路径,默认为false,此时拼接在data/chatgpt下
|
||||
* @returns {Promise<string>} 最终下载文件的存储位置
|
||||
*/
|
||||
export async function downloadFile (url, destPath, absolute = false) {
|
||||
let response = await fetch(url)
|
||||
if (!response.ok) {
|
||||
throw new Error(`download file http error: status: ${response.status}`)
|
||||
}
|
||||
let dest = destPath
|
||||
if (!absolute) {
|
||||
const _path = process.cwd()
|
||||
dest = path.join(_path, 'data', 'chatgpt', dest)
|
||||
const lastLevelDirPath = path.dirname(dest)
|
||||
mkdirs(lastLevelDirPath)
|
||||
}
|
||||
const fileStream = fs.createWriteStream(dest)
|
||||
await new Promise((resolve, reject) => {
|
||||
response.body.pipe(fileStream)
|
||||
response.body.on('error', err => {
|
||||
reject(err)
|
||||
})
|
||||
fileStream.on('finish', function () {
|
||||
resolve()
|
||||
})
|
||||
})
|
||||
logger.info(`File downloaded successfully! URL: ${url}, Destination: ${dest}`)
|
||||
return dest
|
||||
}
|
||||
|
||||
export function isPureText (filename) {
|
||||
const ext = path.extname(filename).toLowerCase()
|
||||
|
||||
// List of file extensions that can be treated as pure text
|
||||
const textFileExtensions = ['.txt', '.log', '.md', '.csv', '.html', '.css', '.js', '.json', '.xml', '.py', '.java', '.cpp', '.c', '.rb', '.php', '.sql', '.sh', '.pl', '.r', '.swift', '.go', '.ts', '.htm', '.yaml', '.yml', '.ini', '.properties', '.tsv']
|
||||
|
||||
// File types that require additional processing
|
||||
const processingExtensions = ['.docx', '.pptx', '.xlsx', '.pdf', '.epub']
|
||||
|
||||
if (textFileExtensions.includes(ext)) {
|
||||
return 'text'
|
||||
} else if (processingExtensions.includes(ext)) {
|
||||
// Return the file extension if additional processing is needed
|
||||
return ext.replace('.', '')
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文件中提取文本内容
|
||||
* @param fileMsgElem MessageElem
|
||||
* @returns {Promise<{}>} 提取的文本内容和文件名
|
||||
*/
|
||||
export async function extractContentFromFile (fileMsgElem, e) {
|
||||
logger.info('filename: ' + fileMsgElem.name)
|
||||
let fileType = isPureText(fileMsgElem.name)
|
||||
if (fileType) {
|
||||
// 可读的文件类型
|
||||
let fileUrl = e.isGroup ? await e.group.getFileUrl(fileMsgElem.fid) : await e.friend.getFileUrl(fileMsgElem.fid)
|
||||
let filePath = await downloadFile(fileUrl, path.join('received', fileMsgElem.name))
|
||||
switch (fileType) {
|
||||
case 'pdf': {
|
||||
if (!pdfjsLib) {
|
||||
return {}
|
||||
}
|
||||
const data = new Uint8Array(fs.readFileSync(filePath))
|
||||
let loadingTask = pdfjsLib.getDocument(data)
|
||||
try {
|
||||
const pdfDocument = await loadingTask.promise
|
||||
const numPages = pdfDocument.numPages
|
||||
let pdfText = ''
|
||||
|
||||
// limit pages to prevent OOM or LLM down
|
||||
let maxPage = 100
|
||||
// Iterate through each page and extract text
|
||||
for (let pageNum = 1; pageNum <= Math.min(numPages, maxPage); ++pageNum) {
|
||||
const page = await pdfDocument.getPage(pageNum)
|
||||
const textContent = await page.getTextContent()
|
||||
const pageText = textContent.items.map(item => item.str).join(' ')
|
||||
pdfText += pageText
|
||||
}
|
||||
|
||||
return {
|
||||
content: pdfText,
|
||||
name: fileMsgElem.name
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error reading PDF file:', error)
|
||||
return {}
|
||||
}
|
||||
}
|
||||
case 'doc': {
|
||||
logger.error('not supported file type now')
|
||||
return ''
|
||||
}
|
||||
case 'docx': {
|
||||
if (!mammoth) {
|
||||
return {}
|
||||
}
|
||||
try {
|
||||
const { value } = await mammoth.extractRawText({ path: filePath })
|
||||
return {
|
||||
content: value,
|
||||
name: fileMsgElem.name
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error reading .docx file:', error)
|
||||
return {}
|
||||
}
|
||||
}
|
||||
case 'xls': {
|
||||
logger.error('not supported file type now')
|
||||
return {}
|
||||
}
|
||||
case 'xlsx': {
|
||||
if (!XLSX) {
|
||||
return {}
|
||||
}
|
||||
try {
|
||||
const workbook = XLSX.readFile(filePath)
|
||||
const sheetName = workbook.SheetNames[0] // Assuming the first sheet is the one you want to read
|
||||
const sheet = workbook.Sheets[sheetName]
|
||||
const data = XLSX.utils.sheet_to_json(sheet, { header: 1 })
|
||||
|
||||
// Convert the 2D array to plain text
|
||||
return {
|
||||
content: data.map(row => row.join('\t')).join('\n'),
|
||||
name: fileMsgElem.name
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error reading .xlsx file:', error)
|
||||
return {}
|
||||
}
|
||||
}
|
||||
case 'ppt': {
|
||||
logger.error('not supported file type now')
|
||||
return {}
|
||||
}
|
||||
case 'pptx': {
|
||||
if (!PPTX) {
|
||||
return {}
|
||||
}
|
||||
try {
|
||||
let pptx = new PPTX.Composer()
|
||||
await pptx.load(filePath)
|
||||
let presentationContent = []
|
||||
let slideNumber = 1
|
||||
let maxSlideNumber = 60
|
||||
while (slideNumber <= maxSlideNumber) {
|
||||
let slide
|
||||
try {
|
||||
slide = pptx.getSlide(slideNumber)
|
||||
} catch (error) {
|
||||
// Slide number out of range, break the loop
|
||||
break
|
||||
}
|
||||
|
||||
let slideContent = []
|
||||
|
||||
// Iterate through slide elements and extract text content
|
||||
slide.elements.forEach(element => {
|
||||
if (element.text) {
|
||||
slideContent.push(element.text)
|
||||
}
|
||||
})
|
||||
|
||||
// Add slide content to the presentation content array
|
||||
presentationContent.push(slideContent.join('\n'))
|
||||
|
||||
// Move to the next slide
|
||||
slideNumber++
|
||||
}
|
||||
return {
|
||||
content: presentationContent.join('\n'),
|
||||
name: fileMsgElem.name
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error reading .pptx file:', error)
|
||||
return {}
|
||||
}
|
||||
}
|
||||
case 'epub': {
|
||||
logger.error('not supported file type now')
|
||||
return {}
|
||||
}
|
||||
default: {
|
||||
// text type
|
||||
const data = fs.readFileSync(filePath)
|
||||
let text = String(data)
|
||||
if (text) {
|
||||
return {
|
||||
content: text,
|
||||
name: fileMsgElem.name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return {}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue