fix:修复对群友@时昵称中含空格导致一部分昵称被统计 (#384)

优化： 1.字典在导入nodejieba时初始化一次即可修复： 1.通过qq消息对象中message对象的type类型直接过滤出文本内容
2025-12-17 13:57:10 +00:00 · 2023-04-24 23:58:36 +08:00 · 2023-04-24 23:58:36 +08:00 · 06b26ab580
commit 06b26ab580
parent 9e4049181a
1 changed files with 7 additions and 12 deletions
--- a/utils/wordcloud/tokenizer.js
+++ b/utils/wordcloud/tokenizer.js
@ -4,6 +4,7 @@ import fs from 'fs'
 let nodejieba
 try {
  nodejieba = (await import('@node-rs/jieba')).default
  nodejieba.load()
 } catch (err) {
  logger.info('未安装@node-rs/jieba，娱乐功能-词云统计不可用')
 }
@ -61,23 +62,17 @@ export class Tokenizer {
    }
    let chats = await this.getTodayHistory(groupId)
    logger.mark(`聊天记录拉去完成，获取到今日内${chats.length}条聊天记录，准备分词中`)
-    try {
+   
      nodejieba.load()
    } catch (err) {
      // ignore already load error
    }
    const _path = process.cwd()
    let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`
    const data = fs.readFileSync(stopWordsPath)
    const stopWords = String(data)?.split('\n') || []
    let chatContent = chats
-      .map(c => c.raw_message
+      .map(c => c.message
-        .replaceAll('[图片]', '')
+           //只统计文本内容
-        .replaceAll('[表情]', '')
+           .filter(item => item.type == 'text')
-        .replaceAll('[动画表情]', '')
+           .map(textItem => `${textItem.text}`)
-        .replaceAll('[语音]', '')
+           .join("").trim()
        .replaceAll(/@\S+\s?/g, '')
        .trim()
      )
      .map(c => {
        let length = c.length