chatgpt-plugin/utils/alibaba/qwen-api.ts
2025-02-03 00:07:26 +08:00

405 lines
12 KiB
TypeScript

// @ts-ignore
import Keyv from 'keyv'
// @ts-ignore
import pTimeout from 'p-timeout'
// @ts-ignore
import QuickLRU from 'quick-lru'
// @ts-ignore
import {v4 as uuidv4} from 'uuid'
import * as tokenizer from './tokenizer'
import * as types from './types'
// @ts-ignore
import globalFetch from 'node-fetch'
import {qwen, Role} from "./types";
import {openai} from "../openai/types";
const CHATGPT_MODEL = 'qwen-turbo' // qwen-plus
const USER_LABEL_DEFAULT = 'User'
const ASSISTANT_LABEL_DEFAULT = '通义千问'
export class QwenApi {
protected _apiKey: string
protected _apiBaseUrl: string
protected _debug: boolean
protected _systemMessage: string
protected _completionParams: Omit<
types.qwen.CreateChatCompletionRequest,
'messages' | 'n'
>
protected _maxModelTokens: number
protected _maxResponseTokens: number
protected _fetch: types.FetchFn
protected _getMessageById: types.GetMessageByIdFunction
protected _upsertMessage: types.UpsertMessageFunction
protected _messageStore: Keyv<types.ChatMessage>
/**
* Creates a new client wrapper around Qwen's chat completion API, mimicing the official ChatGPT webapp's functionality as closely as possible.
*
* @param opts
*/
constructor(opts: types.QWenAPIOptions) {
const {
apiKey,
apiBaseUrl = 'https://dashscope.aliyuncs.com/api/v1',
debug = false,
messageStore,
completionParams,
parameters,
systemMessage,
getMessageById,
upsertMessage,
fetch = globalFetch
} = opts
this._apiKey = apiKey
this._apiBaseUrl = apiBaseUrl
this._debug = !!debug
// @ts-ignore
this._fetch = fetch
this._completionParams = {
model: CHATGPT_MODEL,
parameters: {
top_p: 0.5,
top_k: 50,
temperature: 1.0,
seed: 114514,
enable_search: true,
result_format: "message",
incremental_output: false,
...parameters
},
...completionParams
}
this._systemMessage = systemMessage
if (this._systemMessage === undefined) {
const currentDate = new Date().toISOString().split('T')[0]
this._systemMessage = `You are Qwen, a large language model trained by Alibaba Cloud. Answer as concisely as possible.\nCurrent date: ${currentDate}`
}
this._getMessageById = getMessageById ?? this._defaultGetMessageById
this._upsertMessage = upsertMessage ?? this._defaultUpsertMessage
if (messageStore) {
this._messageStore = messageStore
} else {
this._messageStore = new Keyv<types.ChatMessage, any>({
store: new QuickLRU<string, types.ChatMessage>({maxSize: 10000})
})
}
if (!this._apiKey) {
throw new Error('Qwen missing required apiKey')
}
if (!this._fetch) {
throw new Error('Invalid environment; fetch is not defined')
}
if (typeof this._fetch !== 'function') {
throw new Error('Invalid "fetch" is not a function')
}
}
/**
* Sends a message to the Qwen chat completions endpoint, waits for the response
* to resolve, and returns the response.
*
* If you want your response to have historical context, you must provide a valid `parentMessageId`.
*
* If you want to receive a stream of partial responses, use `opts.onProgress`.
*
* Set `debug: true` in the `ChatGPTAPI` constructor to log more info on the full prompt sent to the Qwen chat completions API. You can override the `systemMessage` in `opts` to customize the assistant's instructions.
*
* @param message - The prompt message to send
* @param opts.parentMessageId - Optional ID of the previous message in the conversation (defaults to `undefined`)
* @param opts.conversationId - Optional ID of the conversation (defaults to `undefined`)
* @param opts.messageId - Optional ID of the message to send (defaults to a random UUID)
* @param opts.systemMessage - Optional override for the chat "system message" which acts as instructions to the model (defaults to the ChatGPT system message)
* @param opts.timeoutMs - Optional timeout in milliseconds (defaults to no timeout)
* @param opts.onProgress - Optional callback which will be invoked every time the partial response is updated
* @param opts.abortSignal - Optional callback used to abort the underlying `fetch` call using an [AbortController](https://developer.mozilla.org/en-US/docs/Web/API/AbortController)
* @param opts.completionParams - Optional overrides to send to the [Qwen chat completion API](https://platform.openai.com/docs/api-reference/chat/create). Options like `temperature` and `presence_penalty` can be tweaked to change the personality of the assistant.
*
* @returns The response from ChatGPT
*/
async sendMessage(
text: string,
opts: types.SendMessageOptions = {},
role: Role = 'user',
): Promise<types.ChatMessage> {
let {
parentMessageId,
messageId = uuidv4(),
timeoutMs,
completionParams,
conversationId
} = opts
let {abortSignal} = opts
let abortController: AbortController = null
if (timeoutMs && !abortSignal) {
abortController = new AbortController()
abortSignal = abortController.signal
}
const message: types.ChatMessage = {
role,
id: messageId,
conversationId,
parentMessageId,
text,
}
const latestQuestion = message
let parameters = Object.assign(
this._completionParams.parameters,
completionParams.parameters
)
completionParams = Object.assign(this._completionParams, completionParams)
completionParams.parameters = parameters
const {messages, maxTokens, numTokens} = await this._buildMessages(
text,
role,
opts,
completionParams
)
console.log(`maxTokens: ${maxTokens}, numTokens: ${numTokens}`)
const result: types.ChatMessage & { conversation: qwen.ChatCompletionRequestMessage[] } = {
role: 'assistant',
id: uuidv4(),
conversationId,
parentMessageId: messageId,
text: undefined,
functionCall: undefined,
conversation: []
}
completionParams.input = {messages}
const responseP = new Promise<types.ChatMessage>(
async (resolve, reject) => {
const url = `${this._apiBaseUrl}/services/aigc/text-generation/generation`
const headers = {
'Content-Type': 'application/json',
Authorization: `Bearer ${this._apiKey}`
}
const body = completionParams
if (this._debug) {
console.log(JSON.stringify(body))
}
if (this._debug) {
console.log(`sendMessage (${numTokens} tokens)`, body)
}
try {
const res = await this._fetch(url, {
method: 'POST',
headers,
body: JSON.stringify(body),
signal: abortSignal
})
if (!res.ok) {
const reason = await res.text()
const msg = `Qwen error ${
res.status || res.statusText
}: ${reason}`
const error = new types.ChatGPTError(msg)
error.statusCode = res.status
error.statusText = res.statusText
return reject(error)
}
const response: types.qwen.CreateChatCompletionResponse =
await res.json()
if (this._debug) {
console.log(response)
}
if (response.output?.choices?.[0]?.message?.tool_calls?.length > 0) {
// function call result
result.functionCall = response.output.choices[0].message.tool_calls[0].function
result.toolCalls = response.output?.choices?.[0]?.message?.tool_calls
}
if (response?.request_id) {
result.id = response.request_id
}
result.detail = response
result.text = response.output.choices[0].message.content
result.conversation = messages
return resolve(result)
} catch (err) {
return reject(err)
}
}
).then(async (message) => {
return Promise.all([
this._upsertMessage(latestQuestion),
this._upsertMessage(message)
]).then(() => message)
})
if (timeoutMs) {
if (abortController) {
// This will be called when a timeout occurs in order for us to forcibly
// ensure that the underlying HTTP request is aborted.
;(responseP as any).cancel = () => {
abortController.abort()
}
}
return pTimeout(responseP, {
milliseconds: timeoutMs,
message: 'Qwen timed out waiting for response'
})
} else {
return responseP
}
}
// @ts-ignore
get apiKey(): string {
return this._apiKey
}
// @ts-ignore
set apiKey(apiKey: string) {
this._apiKey = apiKey
}
protected async _buildMessages(text: string, role: Role, opts: types.SendMessageOptions, completionParams: Partial<
Omit<qwen.CreateChatCompletionRequest, 'messages' | 'n' | 'stream'>
>) {
const {systemMessage = this._systemMessage} = opts
let {parentMessageId} = opts
const userLabel = USER_LABEL_DEFAULT
const assistantLabel = ASSISTANT_LABEL_DEFAULT
// fix number of qwen
const maxNumTokens = 32000
let messages: types.qwen.ChatCompletionRequestMessage[] = []
if (systemMessage) {
messages.push({
role: 'system',
content: systemMessage
})
}
const systemMessageOffset = messages.length
let nextMessages = text
? messages.concat([
{
role,
content: text,
name: role === 'tool' ? opts.name : undefined
}
])
: messages
let functionToken = 0
let numTokens = functionToken
do {
const prompt = nextMessages
.reduce((prompt, message) => {
switch (message.role) {
case 'system':
return prompt.concat([`Instructions:\n${message.content}`])
case 'user':
return prompt.concat([`${userLabel}:\n${message.content}`])
default:
return message.content ? prompt.concat([`${assistantLabel}:\n${message.content}`]) : prompt
}
}, [] as string[])
.join('\n\n')
let nextNumTokensEstimate = await this._getTokenCount(prompt)
for (const m1 of nextMessages) {
nextNumTokensEstimate += await this._getTokenCount('')
}
const isValidPrompt = nextNumTokensEstimate + functionToken <= maxNumTokens
if (prompt && !isValidPrompt) {
break
}
messages = nextMessages
numTokens = nextNumTokensEstimate + functionToken
if (!isValidPrompt) {
break
}
if (!parentMessageId) {
break
}
const parentMessage = await this._getMessageById(parentMessageId)
if (!parentMessage) {
break
}
const parentMessageRole = parentMessage.role || 'user'
nextMessages = nextMessages.slice(0, systemMessageOffset).concat([
{
role: parentMessageRole,
content: parentMessage.functionCall ? parentMessage.functionCall.arguments : parentMessage.text,
name: parentMessage.functionCall ? parentMessage.functionCall.name : undefined,
tool_calls: parentMessage.toolCalls
},
...nextMessages.slice(systemMessageOffset)
])
parentMessageId = parentMessage.parentMessageId
} while (true)
// Use up to 4096 tokens (prompt + response), but try to leave 1000 tokens
// for the response.
const maxTokens = Math.max(
1,
Math.min(this._maxModelTokens - numTokens, this._maxResponseTokens)
)
return {messages, maxTokens, numTokens}
}
protected async _getTokenCount(text: string) {
if (!text) {
return 0
}
// TODO: use a better fix in the tokenizer
text = text.replace(/<\|endoftext\|>/g, '')
return tokenizer.encode(text).length
}
protected async _defaultGetMessageById(
id: string
): Promise<types.ChatMessage> {
const res = await this._messageStore.get(id)
return res
}
protected async _defaultUpsertMessage(
message: types.ChatMessage
): Promise<void> {
await this._messageStore.set(message.id, message)
}
}