fix: memory cursor and extract prompt

This commit is contained in:
ikechan8370 2025-11-05 15:02:52 +08:00
parent 660de1b462
commit 0550e6d492
7 changed files with 257 additions and 48 deletions

View file

@ -5,6 +5,7 @@ import common from '../../../lib/common/common.js'
import { getGroupContextPrompt } from '../utils/group.js'
import { formatTimeToBeiJing } from '../utils/common.js'
import { extractTextFromUserMessage, processUserMemory } from '../models/memory/userMemoryManager.js'
import { buildMemoryPrompt } from '../models/memory/prompt.js'
export class bym extends plugin {
constructor () {
@ -100,9 +101,29 @@ export class bym extends plugin {
this.reply(forwardElement)
}
}
const systemSegments = []
if (sendMessageOption.systemOverride) {
systemSegments.push(sendMessageOption.systemOverride)
}
if (userText) {
const memoryPrompt = await buildMemoryPrompt({
userId: e.sender.user_id + '',
groupId: e.isGroup ? e.group_id + '' : null,
queryText: userText
})
if (memoryPrompt) {
systemSegments.push(memoryPrompt)
logger.debug(`[Memory] bym memory prompt: ${memoryPrompt}`)
}
}
if (ChatGPTConfig.llm.enableGroupContext && e.isGroup) {
const contextPrompt = await getGroupContextPrompt(e, ChatGPTConfig.llm.groupContextLength)
sendMessageOption.systemOverride = sendMessageOption.systemOverride ? sendMessageOption.systemOverride + '\n' + contextPrompt : contextPrompt
if (contextPrompt) {
systemSegments.push(contextPrompt)
}
}
if (systemSegments.length > 0) {
sendMessageOption.systemOverride = systemSegments.join('\n\n')
}
// 发送
const response = await Chaite.getInstance().sendMessage(userMessage, e, {

View file

@ -203,6 +203,8 @@ class ChatGPTConfig {
* promptHeader: string,
* promptItemTemplate: string,
* promptFooter: string,
* extractionSystemPrompt: string,
* extractionUserPrompt: string,
* vectorMaxDistance: number,
* textMaxBm25Score: number,
* maxFactsPerInjection: number,
@ -219,7 +221,9 @@ class ChatGPTConfig {
* minImportanceForInjection: number,
* promptHeader: string,
* promptItemTemplate: string,
* promptFooter: string
* promptFooter: string,
* extractionSystemPrompt: string,
* extractionUserPrompt: string
* },
* extensions: {
* simple: {
@ -245,9 +249,24 @@ class ChatGPTConfig {
hybridPrefer: 'vector-first',
historyPollInterval: 300,
historyBatchSize: 120,
promptHeader: '# 以下是一些该群聊中可能相关的事实,你可以参考,但你只能将其作为你的默认长期知识与记忆,但不要主动透露这些事实。',
promptHeader: '# 以下是一些该群聊中可能相关的事实,你可以参考,但不要主动透露这些事实。',
promptItemTemplate: '- ${fact}${topicSuffix}',
promptFooter: '',
extractionSystemPrompt: `You are a knowledge extraction assistant that specialises in summarising long-term facts from group chat transcripts.
Read the provided conversation and identify statements that should be stored as long-term knowledge for the group.
Return a JSON array. Each element must contain:
{
"fact": 事实内容必须完整包含事件的各个要素而不能是简单的短语比如谁参与了事件做了什么事情背景时间是什么同一件事情尽可能整合为同一条而非拆分以便利于检索,
"topic": 主题关键词字符串 "活动""成员信息",
"importance": 一个介于0和1之间的小数数值越大表示越重要,
"source_message_ids": 原始消息ID数组,
"source_messages": 对应原始消息的简要摘录或合并文本,
"involved_users": 出现或相关的用户ID数组
}
Only include meaningful, verifiable group-specific information that is useful for future conversations. Do not record incomplete information. Do not include general knowledge or unrelated facts. Do not wrap the JSON array in code fences.`,
extractionUserPrompt: `以下是群聊中的一些消息请根据系统说明提取值得长期记忆的事实以JSON数组形式返回不要输出额外说明。
\${messages}`,
vectorMaxDistance: 0,
textMaxBm25Score: 0,
maxFactsPerInjection: 5,
@ -264,7 +283,13 @@ class ChatGPTConfig {
minImportanceForInjection: 0,
promptHeader: '# 用户画像',
promptItemTemplate: '- ${value}',
promptFooter: ''
promptFooter: '',
extractionSystemPrompt: `You are an assistant that extracts long-term personal preferences or persona details about a user.
Given a conversation snippet between the user and the bot, identify durable information such as preferences, nicknames, roles, speaking style, habits, or other facts that remain valid over time.
Return a JSON array of **strings**, and nothing else, without any other characters including \`\`\` or \`\`\`json. Each string must be a short sentence (in the same language as the conversation) describing one piece of long-term memory. Do not include keys, JSON objects, or additional metadata. Ignore temporary topics or uncertain information.`,
extractionUserPrompt: `下面是用户与机器人的对话,请根据系统提示提取可长期记忆的个人信息。
\${messages}`
},
extensions: {
simple: {

View file

@ -4,6 +4,7 @@ import { extractGroupFacts } from './extractor.js'
import { memoryService } from './service.js'
import { getBotFramework } from '../../utils/bot.js'
import { ICQQGroupContextCollector, TRSSGroupContextCollector } from '../../utils/group.js'
import { groupHistoryCursorStore } from './groupHistoryCursorStore.js'
const DEFAULT_MAX_WINDOW = 300 // seconds
const DEFAULT_HISTORY_BATCH = 120
@ -356,10 +357,10 @@ export class GroupMessageCollector {
timestamp: timestampMs || Date.now()
}
const messageKey = this.resolveMessageKey(payload, messageId, timestampMs)
if (this.shouldSkipMessage(state, timestampMs, messageKey)) {
if (this.shouldSkipMessage(state, timestampMs, messageKey, payload.message_id)) {
return false
}
this.updateGroupState(state, timestampMs, messageKey)
this.updateGroupState(groupId, state, timestampMs, messageKey, payload.message_id)
buffer.messages.push(payload)
logger.debug(`[Memory] buffered group message, group=${groupId}, buffer=${buffer.messages.length}`)
this.tryTriggerFlush(groupId, buffer)
@ -393,19 +394,29 @@ export class GroupMessageCollector {
getGroupState (groupId) {
let state = this.groupStates.get(groupId)
if (!state) {
const cursor = groupHistoryCursorStore.getCursor(groupId)
const lastTimestamp = Number(cursor?.last_timestamp) || 0
const lastMessageId = cursor?.last_message_id || null
state = {
lastTimestamp: 0,
lastTimestamp,
lastMessageId,
recentIds: new Set()
}
if (lastMessageId) {
state.recentIds.add(lastMessageId)
}
this.groupStates.set(groupId, state)
}
return state
}
shouldSkipMessage (state, timestampMs, messageKey) {
shouldSkipMessage (state, timestampMs, messageKey, messageId) {
if (!state) {
return false
}
if (messageId && state.lastMessageId && messageId === state.lastMessageId) {
return true
}
if (timestampMs && timestampMs < state.lastTimestamp) {
return true
}
@ -418,8 +429,9 @@ export class GroupMessageCollector {
return false
}
updateGroupState (state, timestampMs, messageKey) {
if (!timestampMs) {
updateGroupState (groupId, state, timestampMs, messageKey, messageId) {
const hasTimestamp = Number.isFinite(timestampMs) && timestampMs > 0
if (!hasTimestamp) {
if (messageKey) {
state.recentIds.add(messageKey)
if (state.recentIds.size > MAX_RECENT_IDS) {
@ -427,20 +439,35 @@ export class GroupMessageCollector {
state.recentIds = new Set(ids)
}
}
if (messageId) {
state.lastMessageId = String(messageId)
groupHistoryCursorStore.updateCursor(groupId, {
lastMessageId: state.lastMessageId,
lastTimestamp: state.lastTimestamp || null
})
}
return
}
if (timestampMs > state.lastTimestamp) {
state.lastTimestamp = timestampMs
state.recentIds = messageKey ? new Set([messageKey]) : new Set()
return
}
if (timestampMs === state.lastTimestamp && messageKey) {
} else if (timestampMs === state.lastTimestamp && messageKey) {
state.recentIds.add(messageKey)
if (state.recentIds.size > MAX_RECENT_IDS) {
const ids = Array.from(state.recentIds).slice(-MAX_RECENT_IDS)
state.recentIds = new Set(ids)
}
}
if (messageId) {
state.lastMessageId = String(messageId)
}
groupHistoryCursorStore.updateCursor(groupId, {
lastMessageId: state.lastMessageId || null,
lastTimestamp: state.lastTimestamp || timestampMs
})
}
getBuffer (groupId) {

View file

@ -5,6 +5,7 @@ import path from 'path'
import ChatGPTConfig from '../../config/config.js'
const META_VECTOR_DIM_KEY = 'group_vec_dimension'
const META_VECTOR_MODEL_KEY = 'group_vec_model'
const META_GROUP_TOKENIZER_KEY = 'group_memory_tokenizer'
const META_USER_TOKENIZER_KEY = 'user_memory_tokenizer'
const TOKENIZER_DEFAULT = 'unicode61'
@ -14,6 +15,7 @@ const PLUGIN_ROOT = path.resolve('./plugins/chatgpt-plugin')
let dbInstance = null
let cachedVectorDimension = null
let cachedVectorModel = null
let userMemoryFtsConfig = {
tokenizer: TOKENIZER_DEFAULT,
matchQuery: null
@ -329,6 +331,17 @@ function ensureGroupFactsTable (db) {
ensureGroupFactsFtsTable(db)
}
function ensureGroupHistoryCursorTable (db) {
ensureMetaTable(db)
db.exec(`
CREATE TABLE IF NOT EXISTS group_history_cursor (
group_id TEXT PRIMARY KEY,
last_message_id TEXT,
last_timestamp INTEGER
)
`)
}
function ensureUserMemoryTable (db) {
ensureMetaTable(db)
db.exec(`
@ -550,6 +563,9 @@ function ensureUserMemoryFtsTable (db) {
}
function createVectorTable (db, dimension) {
if (!dimension || dimension <= 0) {
throw new Error(`Invalid vector dimension for table creation: ${dimension}`)
}
db.exec(`CREATE VIRTUAL TABLE vec_group_facts USING vec0(embedding float[${dimension}])`)
}
@ -559,10 +575,24 @@ function ensureVectorTable (db) {
return cachedVectorDimension
}
const stored = getMetaValue(db, META_VECTOR_DIM_KEY)
const storedModel = getMetaValue(db, META_VECTOR_MODEL_KEY)
const currentModel = ChatGPTConfig.llm?.embeddingModel || ''
if (!storedModel || storedModel !== currentModel) {
try {
db.exec('DROP TABLE IF EXISTS vec_group_facts')
} catch (err) {
logger?.warn?.('[Memory] failed to drop vec_group_facts during model change:', err)
}
setMetaValue(db, META_VECTOR_MODEL_KEY, currentModel)
setMetaValue(db, META_VECTOR_DIM_KEY, '0')
cachedVectorDimension = 0
cachedVectorModel = currentModel
return cachedVectorDimension
}
let dimension = stored ? parseInt(stored, 10) : null
if (!dimension || Number.isNaN(dimension) || dimension <= 0) {
dimension = resolvePreferredDimension()
setMetaValue(db, META_VECTOR_DIM_KEY, dimension.toString())
cachedVectorDimension = 0
return cachedVectorDimension
}
const exists = db.prepare(`
SELECT name FROM sqlite_master
@ -572,6 +602,7 @@ function ensureVectorTable (db) {
createVectorTable(db, dimension)
}
cachedVectorDimension = dimension
cachedVectorModel = currentModel
return dimension
}
@ -588,11 +619,15 @@ export function resetVectorTableDimension (dimension) {
}
createVectorTable(db, dimension)
setMetaValue(db, META_VECTOR_DIM_KEY, dimension.toString())
const model = ChatGPTConfig.llm?.embeddingModel || ''
setMetaValue(db, META_VECTOR_MODEL_KEY, model)
cachedVectorDimension = dimension
cachedVectorModel = model
}
function migrate (db) {
ensureGroupFactsTable(db)
ensureGroupHistoryCursorTable(db)
ensureUserMemoryTable(db)
ensureVectorTable(db)
}

View file

@ -43,10 +43,10 @@ function formatEntry (entry) {
return str.length > limit ? str.slice(0, limit) + '…' : str
}
function buildGroupSystemPrompt () {
return `You are a knowledge extraction assistant that specialises in summarising long-term facts from chat transcripts.
Read the provided group conversation and identify statements that should be stored as long-term knowledge for the group.
Note that you should only record valuable information, as this will help the LLM assistant use it as objective facts to answer questions in the future.
function resolveGroupExtractionPrompts () {
const config = ChatGPTConfig.memory?.group || {}
const system = config.extractionSystemPrompt || `You are a knowledge extraction assistant that specialises in summarising long-term facts from group chat transcripts.
Read the provided conversation and identify statements that should be stored as long-term knowledge for the group.
Return a JSON array. Each element must contain:
{
"fact": 事实内容必须完整包含事件的各个要素而不能是简单的短语比如谁参与了事件做了什么事情背景时间是什么同一件事情尽可能整合为同一条而非拆分以便利于检索,
@ -56,17 +56,19 @@ Return a JSON array. Each element must contain:
"source_messages": 对应原始消息的简要摘录或合并文本,
"involved_users": 出现或相关的用户ID数组
}
Only include meaningful, verifiable information about group members that is useful for future conversations. Do not record incomplete information. Do not record other common sense which is not specified for the group`
Only include meaningful, verifiable group-specific information that is useful for future conversations. Do not record incomplete information. Do not include general knowledge or unrelated facts. Do not wrap the JSON array in code fences.`
const userTemplate = config.extractionUserPrompt || `以下是群聊中的一些消息请根据系统说明提取值得长期记忆的事实以JSON数组形式返回不要输出额外说明。
${'{messages}'}`
return { system, userTemplate }
}
function buildGroupUserPrompt (messages) {
function buildGroupUserPrompt (messages, template) {
const joined = messages.map(msg => {
const sender = msg.nickname || msg.user_id || '未知用户'
return `${sender}: ${msg.text}`
}).join('\n')
return `以下是群聊中的一些消息请根据系统说明提取值得长期记忆的事实以JSON数组形式返回不要输出额外说明。
${joined}`
return template.replace('${messages}', joined)
}
function buildExistingMemorySection (existingMemories = []) {
@ -77,22 +79,28 @@ function buildExistingMemorySection (existingMemories = []) {
return `以下是关于用户的已知长期记忆,请在提取新记忆时参考,避免重复已有事实,并在信息变更时更新描述:\n${lines.join('\n')}`
}
function buildUserSystemPrompt (existingMemories = []) {
return `You are an assistant that extracts long-term personal preferences or persona details about a user.
function resolveUserExtractionPrompts (existingMemories = []) {
const config = ChatGPTConfig.memory?.user || {}
const systemTemplate = config.extractionSystemPrompt || `You are an assistant that extracts long-term personal preferences or persona details about a user.
Given a conversation snippet between the user and the bot, identify durable information such as preferences, nicknames, roles, speaking style, habits, or other facts that remain valid over time.
Return a JSON array of **strings**, and nothing else. The full response must be a json array!!! Each string must be a short sentence (in the same language as the conversation) describing one piece of long-term memory. Do not include embedded JSON objects, or additional metadata. Ignore temporary topics or uncertain information.
Return a JSON array of **strings**, and nothing else, without any other characters including \`\`\` or \`\`\`json. Each string must be a short sentence (in the same language as the conversation) describing one piece of long-term memory. Do not include keys, JSON objects, or additional metadata. Ignore temporary topics or uncertain information.`
const userTemplate = config.extractionUserPrompt || `下面是用户与机器人的对话,请根据系统提示提取可长期记忆的个人信息。
${buildExistingMemorySection(existingMemories)}`
${'{messages}'}`
return {
system: `${systemTemplate}
${buildExistingMemorySection(existingMemories)}`,
userTemplate
}
}
function buildUserPrompt (messages) {
function buildUserPrompt (messages, template) {
const body = messages.map(msg => {
const prefix = msg.role === 'assistant' ? '机器人' : (msg.nickname || msg.user_id || '用户')
return `${prefix}: ${msg.text}`
}).join('\n')
return `下面是用户与机器人的对话,请根据系统提示提取可长期记忆的个人信息。
${body}`
return template.replace('${messages}', body)
}
async function callModel ({ prompt, systemPrompt, model, maxToken = 4096, temperature = 0.2 }) {
@ -149,10 +157,11 @@ export async function extractGroupFacts (messages) {
return []
}
try {
const prompts = resolveGroupExtractionPrompts()
logger.debug(`[Memory] start group fact extraction, messages=${messages.length}, model=${model}`)
const text = await callModel({
prompt: buildGroupUserPrompt(messages),
systemPrompt: buildGroupSystemPrompt(),
prompt: buildGroupUserPrompt(messages, prompts.userTemplate),
systemPrompt: prompts.system,
model
})
const parsed = parseJSON(text)
@ -181,12 +190,11 @@ export async function extractUserMemories (messages, existingMemories = []) {
return []
}
try {
const prompts = resolveUserExtractionPrompts(existingMemories)
logger.debug(`[Memory] start user memory extraction, snippets=${messages.length}, existing=${existingMemories.length}, model=${model}`)
// logger.debug(`[Memory] memories prompt: ${buildUserPrompt(messages)}`)
// logger.debug(`[Memory] system prompt: ${buildUserSystemPrompt(existingMemories)}`)
const text = await callModel({
prompt: buildUserPrompt(messages),
systemPrompt: buildUserSystemPrompt(existingMemories),
prompt: buildUserPrompt(messages, prompts.userTemplate),
systemPrompt: prompts.system,
model
})
const parsed = parseJSON(text)

View file

@ -0,0 +1,61 @@
import { getMemoryDatabase } from './database.js'
function normaliseGroupId (groupId) {
if (groupId === null || groupId === undefined) {
return null
}
const str = String(groupId).trim()
return str || null
}
export class GroupHistoryCursorStore {
constructor (db = getMemoryDatabase()) {
this.resetDatabase(db)
}
resetDatabase (db = getMemoryDatabase()) {
this.db = db
this.selectStmt = this.db.prepare(`
SELECT last_message_id, last_timestamp
FROM group_history_cursor
WHERE group_id = ?
`)
this.upsertStmt = this.db.prepare(`
INSERT INTO group_history_cursor (group_id, last_message_id, last_timestamp)
VALUES (@group_id, @last_message_id, @last_timestamp)
ON CONFLICT(group_id) DO UPDATE SET
last_message_id = excluded.last_message_id,
last_timestamp = excluded.last_timestamp
`)
}
ensureDb () {
if (!this.db || this.db.open === false) {
logger?.debug?.('[Memory] refreshing group history cursor database connection')
this.resetDatabase()
}
return this.db
}
getCursor (groupId) {
const gid = normaliseGroupId(groupId)
if (!gid) return null
this.ensureDb()
return this.selectStmt.get(gid) || null
}
updateCursor (groupId, { lastMessageId = null, lastTimestamp = null } = {}) {
const gid = normaliseGroupId(groupId)
if (!gid) return false
this.ensureDb()
const payload = {
group_id: gid,
last_message_id: lastMessageId ? String(lastMessageId) : null,
last_timestamp: (typeof lastTimestamp === 'number' && Number.isFinite(lastTimestamp)) ? Math.floor(lastTimestamp) : null
}
this.upsertStmt.run(payload)
return true
}
}
export const groupHistoryCursorStore = new GroupHistoryCursorStore()

View file

@ -77,11 +77,21 @@ export class GroupMemoryStore {
ELSE group_facts.created_at
END
`)
this.deleteVecStmt = this.db.prepare('DELETE FROM vec_group_facts WHERE rowid = ?')
this.insertVecStmt = this.db.prepare('INSERT INTO vec_group_facts(rowid, embedding) VALUES (?, ?)')
this.prepareVectorStatements()
this.loadFactByIdStmt = this.db.prepare('SELECT * FROM group_facts WHERE id = ?')
}
prepareVectorStatements () {
try {
this.deleteVecStmt = this.db.prepare('DELETE FROM vec_group_facts WHERE rowid = ?')
this.insertVecStmt = this.db.prepare('INSERT INTO vec_group_facts(rowid, embedding) VALUES (?, ?)')
} catch (err) {
this.deleteVecStmt = null
this.insertVecStmt = null
logger?.debug?.('[Memory] vector table not ready, postpone statement preparation')
}
}
ensureDb () {
if (!this.db || this.db.open === false) {
logger?.debug?.('[Memory] refreshing group memory database connection')
@ -158,11 +168,14 @@ export class GroupMemoryStore {
}
let vectors = []
let dimension = null
let tableDimension = getVectorDimension() || 0
const configuredDimension = Number(ChatGPTConfig.llm?.dimensions || 0)
if (this.isVectorEnabled()) {
try {
dimension = getVectorDimension()
vectors = await embedTexts(filteredFacts.map(f => f.fact), this.embeddingModel, dimension)
const preferredDimension = configuredDimension > 0
? configuredDimension
: (tableDimension > 0 ? tableDimension : undefined)
vectors = await embedTexts(filteredFacts.map(f => f.fact), this.embeddingModel, preferredDimension)
vectors = vectors.map(normaliseEmbeddingVector)
const mismatchVector = vectors.find(vec => {
if (!vec) return false
@ -173,18 +186,26 @@ export class GroupMemoryStore {
return false
})
const actualDimension = mismatchVector ? mismatchVector.length : 0
if (actualDimension && actualDimension !== dimension) {
const expectedDimension = dimension ?? 'unknown'
if (actualDimension && actualDimension !== tableDimension) {
const expectedDimension = tableDimension || preferredDimension || configuredDimension || 'unknown'
logger.warn(`[Memory] embedding dimension mismatch, expected=${expectedDimension}, actual=${actualDimension}. Recreating vector table.`)
try {
resetVectorTableDimension(actualDimension)
this.deleteVecStmt = this.db.prepare('DELETE FROM vec_group_facts WHERE rowid = ?')
this.insertVecStmt = this.db.prepare('INSERT INTO vec_group_facts(rowid, embedding) VALUES (?, ?)')
dimension = actualDimension
this.prepareVectorStatements()
tableDimension = actualDimension
} catch (resetErr) {
logger.error('Failed to reset vector table dimension:', resetErr)
vectors = []
}
} else if (actualDimension && tableDimension <= 0) {
try {
resetVectorTableDimension(actualDimension)
this.prepareVectorStatements()
tableDimension = actualDimension
} catch (resetErr) {
logger.error('Failed to initialise vector table dimension:', resetErr)
vectors = []
}
}
} catch (err) {
logger.error('Failed to embed group facts:', err)
@ -214,6 +235,13 @@ export class GroupMemoryStore {
continue
}
if (Array.isArray(vectorList) && vectorList[i]) {
if (!this.deleteVecStmt || !this.insertVecStmt) {
this.prepareVectorStatements()
}
if (!this.deleteVecStmt || !this.insertVecStmt) {
logger.warn('[Memory] vector table unavailable, skip vector upsert')
continue
}
try {
const vector = normaliseEmbeddingVector(vectorList[i])
if (!vector) {
@ -280,6 +308,10 @@ export class GroupMemoryStore {
}
try {
const dimension = getVectorDimension()
if (!dimension || dimension <= 0) {
logger.debug('[Memory] vector search skipped: vector dimension unavailable')
return []
}
const [embedding] = await embedTexts([queryText], this.embeddingModel, dimension)
if (!embedding) {
return []