From 06b26ab5809b384bf1effb807e4963a2d3225f8d Mon Sep 17 00:00:00 2001 From: 127Wzc <52766844+127Wzc@users.noreply.github.com> Date: Mon, 24 Apr 2023 23:58:36 +0800 Subject: [PATCH] =?UTF-8?q?fix:=E4=BF=AE=E5=A4=8D=E5=AF=B9=E7=BE=A4?= =?UTF-8?q?=E5=8F=8B@=E6=97=B6=20=E6=98=B5=E7=A7=B0=E4=B8=AD=E5=90=AB?= =?UTF-8?q?=E7=A9=BA=E6=A0=BC=20=E5=AF=BC=E8=87=B4=E4=B8=80=E9=83=A8?= =?UTF-8?q?=E5=88=86=E6=98=B5=E7=A7=B0=E8=A2=AB=E7=BB=9F=E8=AE=A1=20(#384)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 优化: 1.字典在导入nodejieba时初始化一次即可 修复: 1.通过qq消息对象中message对象的type类型直接过滤出文本内容 --- utils/wordcloud/tokenizer.js | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/utils/wordcloud/tokenizer.js b/utils/wordcloud/tokenizer.js index 5b4d4c7..ae373a8 100644 --- a/utils/wordcloud/tokenizer.js +++ b/utils/wordcloud/tokenizer.js @@ -4,6 +4,7 @@ import fs from 'fs' let nodejieba try { nodejieba = (await import('@node-rs/jieba')).default + nodejieba.load() } catch (err) { logger.info('未安装@node-rs/jieba,娱乐功能-词云统计不可用') } @@ -61,23 +62,17 @@ export class Tokenizer { } let chats = await this.getTodayHistory(groupId) logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`) - try { - nodejieba.load() - } catch (err) { - // ignore already load error - } + const _path = process.cwd() let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt` const data = fs.readFileSync(stopWordsPath) const stopWords = String(data)?.split('\n') || [] let chatContent = chats - .map(c => c.raw_message - .replaceAll('[图片]', '') - .replaceAll('[表情]', '') - .replaceAll('[动画表情]', '') - .replaceAll('[语音]', '') - .replaceAll(/@\S+\s?/g, '') - .trim() + .map(c => c.message + //只统计文本内容 + .filter(item => item.type == 'text') + .map(textItem => `${textItem.text}`) + .join("").trim() ) .map(c => { let length = c.length