diff --git a/models/memory/database.js b/models/memory/database.js index 2a4e0ce..700acce 100644 --- a/models/memory/database.js +++ b/models/memory/database.js @@ -114,6 +114,22 @@ function resetSimpleState (overrides = {}) { } } +function sanitiseRawFtsInput (input) { + if (!input) { + return '' + } + const trimmed = String(input).trim() + if (!trimmed) { + return '' + } + const replaced = trimmed + .replace(/["'`]+/g, ' ') + .replace(/\u3000/g, ' ') + .replace(/[^\p{L}\p{N}\u4E00-\u9FFF\u3040-\u30FF\uAC00-\uD7AF\u1100-\u11FF\s]+/gu, ' ') + const collapsed = replaced.replace(/\s+/g, ' ').trim() + return collapsed || trimmed +} + function isSimpleLibraryFile (filename) { return /(^libsimple.*\.(so|dylib|dll)$)|(^simple\.(so|dylib|dll)$)/i.test(filename) } @@ -644,6 +660,16 @@ export function getSimpleExtensionState () { return { ...simpleExtensionState } } +export function sanitiseFtsQueryInput (query, ftsConfig) { + if (!query) { + return '' + } + if (ftsConfig?.matchQuery) { + return String(query).trim() + } + return sanitiseRawFtsInput(query) +} + export function getMemoryDatabase () { if (dbInstance) { return dbInstance diff --git a/models/memory/extractor.js b/models/memory/extractor.js index 1a78777..f00f303 100644 --- a/models/memory/extractor.js +++ b/models/memory/extractor.js @@ -1,4 +1,4 @@ -import { SendMessageOption } from 'chaite' +import { SendMessageOption, Chaite } from 'chaite' import ChatGPTConfig from '../../config/config.js' import { getClientForModel } from '../chaite/vectorizer.js' @@ -43,9 +43,40 @@ function formatEntry (entry) { return str.length > limit ? str.slice(0, limit) + '…' : str } -function resolveGroupExtractionPrompts () { +async function resolvePresetSendMessageOption (presetId, scope) { + if (!presetId) { + return null + } + try { + const chaite = Chaite.getInstance?.() + if (!chaite) { + logger.warn(`[Memory] ${scope} extraction preset ${presetId} configured but Chaite is not initialized`) + return null + } + const presetManager = chaite.getChatPresetManager?.() + if (!presetManager) { + logger.warn(`[Memory] ${scope} extraction preset ${presetId} configured but preset manager unavailable`) + return null + } + const preset = await presetManager.getInstance(presetId) + if (!preset) { + logger.warn(`[Memory] ${scope} extraction preset ${presetId} not found`) + return null + } + logger.debug(`[Memory] using ${scope} extraction preset ${presetId}`) + return { + preset, + sendMessageOption: JSON.parse(JSON.stringify(preset.sendMessageOption || {})) + } + } catch (err) { + logger.error(`[Memory] failed to load ${scope} extraction preset ${presetId}:`, err) + return null + } +} + +function resolveGroupExtractionPrompts (presetSendMessageOption) { const config = ChatGPTConfig.memory?.group || {} - const system = config.extractionSystemPrompt || `You are a knowledge extraction assistant that specialises in summarising long-term facts from group chat transcripts. + const system = config.extractionSystemPrompt || presetSendMessageOption?.systemOverride || `You are a knowledge extraction assistant that specialises in summarising long-term facts from group chat transcripts. Read the provided conversation and identify statements that should be stored as long-term knowledge for the group. Return a JSON array. Each element must contain: { @@ -79,9 +110,9 @@ function buildExistingMemorySection (existingMemories = []) { return `以下是关于用户的已知长期记忆,请在提取新记忆时参考,避免重复已有事实,并在信息变更时更新描述:\n${lines.join('\n')}` } -function resolveUserExtractionPrompts (existingMemories = []) { +function resolveUserExtractionPrompts (existingMemories = [], presetSendMessageOption) { const config = ChatGPTConfig.memory?.user || {} - const systemTemplate = config.extractionSystemPrompt || `You are an assistant that extracts long-term personal preferences or persona details about a user. + const systemTemplate = config.extractionSystemPrompt || presetSendMessageOption?.systemOverride || `You are an assistant that extracts long-term personal preferences or persona details about a user. Given a conversation snippet between the user and the bot, identify durable information such as preferences, nicknames, roles, speaking style, habits, or other facts that remain valid over time. Return a JSON array of **strings**, and nothing else, without any other characters including \`\`\` or \`\`\`json. Each string must be a short sentence (in the same language as the conversation) describing one piece of long-term memory. Do not include keys, JSON objects, or additional metadata. Ignore temporary topics or uncertain information.` const userTemplate = config.extractionUserPrompt || `下面是用户与机器人的对话,请根据系统提示提取可长期记忆的个人信息。 @@ -103,8 +134,16 @@ function buildUserPrompt (messages, template) { return template.replace('${messages}', body) } -async function callModel ({ prompt, systemPrompt, model, maxToken = 4096, temperature = 0.2 }) { - const { client } = await getClientForModel(model) +async function callModel ({ prompt, systemPrompt, model, maxToken = 4096, temperature = 0.2, sendMessageOption }) { + const options = sendMessageOption + ? JSON.parse(JSON.stringify(sendMessageOption)) + : {} + options.model = model || options.model + if (!options.model) { + throw new Error('No model available for memory extraction call') + } + const resolvedModel = options.model + const { client } = await getClientForModel(resolvedModel) const response = await client.sendMessage({ role: 'user', content: [ @@ -114,10 +153,11 @@ async function callModel ({ prompt, systemPrompt, model, maxToken = 4096, temper } ] }, SendMessageOption.create({ - model, - // temperature, - maxToken, - systemOverride: systemPrompt, + ...options, + model: options.model, + temperature: options.temperature ?? temperature, + maxToken: options.maxToken ?? maxToken, + systemOverride: systemPrompt ?? options.systemOverride, disableHistoryRead: true, disableHistorySave: true, stream: false @@ -125,44 +165,54 @@ async function callModel ({ prompt, systemPrompt, model, maxToken = 4096, temper return collectTextFromResponse(response) } -function resolveGroupExtractionModel () { +function resolveGroupExtractionModel (presetSendMessageOption) { const config = ChatGPTConfig.memory?.group if (config?.extractionModel) { return config.extractionModel } + if (presetSendMessageOption?.model) { + return presetSendMessageOption.model + } if (ChatGPTConfig.llm?.defaultModel) { return ChatGPTConfig.llm.defaultModel } - return ChatGPTConfig.llm?.embeddingModel || '' + return '' } -function resolveUserExtractionModel () { +function resolveUserExtractionModel (presetSendMessageOption) { const config = ChatGPTConfig.memory?.user if (config?.extractionModel) { return config.extractionModel } + if (presetSendMessageOption?.model) { + return presetSendMessageOption.model + } if (ChatGPTConfig.llm?.defaultModel) { return ChatGPTConfig.llm.defaultModel } - return ChatGPTConfig.llm?.embeddingModel || '' + return '' } export async function extractGroupFacts (messages) { if (!messages || messages.length === 0) { return [] } - const model = resolveGroupExtractionModel() + const groupConfig = ChatGPTConfig.memory?.group || {} + const presetInfo = await resolvePresetSendMessageOption(groupConfig.extractionPresetId, 'group') + const presetOptions = presetInfo?.sendMessageOption + const model = resolveGroupExtractionModel(presetOptions) if (!model) { logger.warn('No model configured for group memory extraction') return [] } try { - const prompts = resolveGroupExtractionPrompts() - logger.debug(`[Memory] start group fact extraction, messages=${messages.length}, model=${model}`) + const prompts = resolveGroupExtractionPrompts(presetOptions) + logger.debug(`[Memory] start group fact extraction, messages=${messages.length}, model=${model}${presetInfo?.preset ? `, preset=${presetInfo.preset.id}` : ''}`) const text = await callModel({ prompt: buildGroupUserPrompt(messages, prompts.userTemplate), systemPrompt: prompts.system, - model + model, + sendMessageOption: presetOptions }) const parsed = parseJSON(text) if (Array.isArray(parsed)) { @@ -184,18 +234,22 @@ export async function extractUserMemories (messages, existingMemories = []) { if (!messages || messages.length === 0) { return [] } - const model = resolveUserExtractionModel() + const userConfig = ChatGPTConfig.memory?.user || {} + const presetInfo = await resolvePresetSendMessageOption(userConfig.extractionPresetId, 'user') + const presetOptions = presetInfo?.sendMessageOption + const model = resolveUserExtractionModel(presetOptions) if (!model) { logger.warn('No model configured for user memory extraction') return [] } try { - const prompts = resolveUserExtractionPrompts(existingMemories) - logger.debug(`[Memory] start user memory extraction, snippets=${messages.length}, existing=${existingMemories.length}, model=${model}`) + const prompts = resolveUserExtractionPrompts(existingMemories, presetOptions) + logger.debug(`[Memory] start user memory extraction, snippets=${messages.length}, existing=${existingMemories.length}, model=${model}${presetInfo?.preset ? `, preset=${presetInfo.preset.id}` : ''}`) const text = await callModel({ prompt: buildUserPrompt(messages, prompts.userTemplate), systemPrompt: prompts.system, - model + model, + sendMessageOption: presetOptions }) const parsed = parseJSON(text) if (Array.isArray(parsed)) { diff --git a/models/memory/groupMemoryStore.js b/models/memory/groupMemoryStore.js index 1aa05a5..42b68d7 100644 --- a/models/memory/groupMemoryStore.js +++ b/models/memory/groupMemoryStore.js @@ -1,4 +1,4 @@ -import { getMemoryDatabase, getVectorDimension, getGroupMemoryFtsConfig, resetVectorTableDimension } from './database.js' +import { getMemoryDatabase, getVectorDimension, getGroupMemoryFtsConfig, resetVectorTableDimension, sanitiseFtsQueryInput } from './database.js' import ChatGPTConfig from '../../config/config.js' import { embedTexts } from '../chaite/vectorizer.js' @@ -342,37 +342,42 @@ export class GroupMemoryStore { if (!queryText || !queryText.trim()) { return [] } - const trimmedQuery = queryText.trim() + const originalQuery = queryText.trim() const ftsConfig = getGroupMemoryFtsConfig() - const matchExpression = ftsConfig.matchQuery ? `${ftsConfig.matchQuery}(?)` : '?' + const matchQueryParam = sanitiseFtsQueryInput(originalQuery, ftsConfig) const results = [] const seen = new Set() - try { - const rows = this.db.prepare(` - SELECT gf.*, bm25(group_facts_fts) AS bm25_score - FROM group_facts_fts - JOIN group_facts gf ON gf.id = group_facts_fts.rowid - WHERE gf.group_id = ? - AND group_facts_fts MATCH ${matchExpression} - ORDER BY bm25_score ASC - LIMIT ? - `).all(groupId, trimmedQuery, limit) - for (const row of rows) { - const bm25Threshold = this.bm25Threshold - if (bm25Threshold) { - const score = Number(row?.bm25_score) - if (!Number.isFinite(score) || score > bm25Threshold) { - continue + if (matchQueryParam) { + const matchExpression = ftsConfig.matchQuery ? `${ftsConfig.matchQuery}(?)` : '?' + try { + const rows = this.db.prepare(` + SELECT gf.*, bm25(group_facts_fts) AS bm25_score + FROM group_facts_fts + JOIN group_facts gf ON gf.id = group_facts_fts.rowid + WHERE gf.group_id = ? + AND group_facts_fts MATCH ${matchExpression} + ORDER BY bm25_score ASC + LIMIT ? + `).all(groupId, matchQueryParam, limit) + for (const row of rows) { + const bm25Threshold = this.bm25Threshold + if (bm25Threshold) { + const score = Number(row?.bm25_score) + if (!Number.isFinite(score) || score > bm25Threshold) { + continue + } + row.bm25_score = score + } + if (row && !seen.has(row.id)) { + results.push(row) + seen.add(row.id) } - row.bm25_score = score - } - if (row && !seen.has(row.id)) { - results.push(row) - seen.add(row.id) } + } catch (err) { + logger.warn('Text search failed for group memory:', err) } - } catch (err) { - logger.warn('Text search failed for group memory:', err) + } else { + logger.debug('[Memory] group memory text search skipped MATCH due to empty query after sanitisation') } if (results.length < limit) { @@ -384,7 +389,7 @@ export class GroupMemoryStore { AND instr(fact, ?) > 0 ORDER BY importance DESC, created_at DESC LIMIT ? - `).all(groupId, trimmedQuery, Math.max(limit * 2, limit)) + `).all(groupId, originalQuery, Math.max(limit * 2, limit)) for (const row of likeRows) { if (row && !seen.has(row.id)) { results.push(row) diff --git a/models/memory/userMemoryStore.js b/models/memory/userMemoryStore.js index 8a0419b..9511bae 100644 --- a/models/memory/userMemoryStore.js +++ b/models/memory/userMemoryStore.js @@ -1,4 +1,4 @@ -import { getMemoryDatabase, getUserMemoryFtsConfig } from './database.js' +import { getMemoryDatabase, getUserMemoryFtsConfig, sanitiseFtsQueryInput } from './database.js' import { md5 } from '../../utils/common.js' function normaliseId (value) { @@ -213,42 +213,46 @@ export class UserMemoryStore { const normUserId = normaliseId(userId) const normGroupId = normaliseId(groupId) const filteredExclude = (excludeIds || []).filter(Boolean) - const trimmedQuery = queryText.trim() + const originalQuery = queryText.trim() const ftsConfig = getUserMemoryFtsConfig() - const matchExpression = ftsConfig.matchQuery ? `${ftsConfig.matchQuery}(?)` : '?' - const params = [normUserId] - let query = ` - SELECT um.*, bm25(user_memory_fts) AS bm25_score - FROM user_memory_fts - JOIN user_memory um ON um.id = user_memory_fts.rowid - WHERE um.user_id = ? - AND user_memory_fts MATCH ${matchExpression} - ` - params.push(trimmedQuery) - if (normGroupId) { - query += ' AND (um.group_id = ? OR um.group_id IS NULL)' - params.push(normGroupId) - } - if (filteredExclude.length) { - query += ` AND um.id NOT IN (${filteredExclude.map(() => '?').join(',')})` - params.push(...filteredExclude) - } - query += ` - ORDER BY bm25_score ASC, um.updated_at DESC - LIMIT ? - ` - params.push(limit) + const matchQueryParam = sanitiseFtsQueryInput(originalQuery, ftsConfig) const results = [] const seen = new Set(filteredExclude) - try { - const ftsRows = this.db.prepare(query).all(...params) - appendRows(results, ftsRows, seen) - } catch (err) { - logger?.warn?.('User memory text search failed:', err) + if (matchQueryParam) { + const matchExpression = ftsConfig.matchQuery ? `${ftsConfig.matchQuery}(?)` : '?' + const params = [normUserId, matchQueryParam] + let query = ` + SELECT um.*, bm25(user_memory_fts) AS bm25_score + FROM user_memory_fts + JOIN user_memory um ON um.id = user_memory_fts.rowid + WHERE um.user_id = ? + AND user_memory_fts MATCH ${matchExpression} + ` + if (normGroupId) { + query += ' AND (um.group_id = ? OR um.group_id IS NULL)' + params.push(normGroupId) + } + if (filteredExclude.length) { + query += ` AND um.id NOT IN (${filteredExclude.map(() => '?').join(',')})` + params.push(...filteredExclude) + } + query += ` + ORDER BY bm25_score ASC, um.updated_at DESC + LIMIT ? + ` + params.push(limit) + try { + const ftsRows = this.db.prepare(query).all(...params) + appendRows(results, ftsRows, seen) + } catch (err) { + logger?.warn?.('User memory text search failed:', err) + } + } else { + logger?.debug?.('[Memory] user memory text search skipped MATCH due to empty query after sanitisation') } if (results.length < limit) { - const likeParams = [normUserId, trimmedQuery] + const likeParams = [normUserId, originalQuery] let likeQuery = ` SELECT um.* FROM user_memory um