fix:修复对群友@时 昵称中含空格 导致一部分昵称被统计 (#384)

优化:
1.字典在导入nodejieba时初始化一次即可
修复:
1.通过qq消息对象中message对象的type类型直接过滤出文本内容
This commit is contained in:
127Wzc 2023-04-24 23:58:36 +08:00 committed by GitHub
parent 9e4049181a
commit 06b26ab580
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,6 +4,7 @@ import fs from 'fs'
let nodejieba
try {
nodejieba = (await import('@node-rs/jieba')).default
nodejieba.load()
} catch (err) {
logger.info('未安装@node-rs/jieba娱乐功能-词云统计不可用')
}
@ -61,23 +62,17 @@ export class Tokenizer {
}
let chats = await this.getTodayHistory(groupId)
logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`)
try {
nodejieba.load()
} catch (err) {
// ignore already load error
}
const _path = process.cwd()
let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`
const data = fs.readFileSync(stopWordsPath)
const stopWords = String(data)?.split('\n') || []
let chatContent = chats
.map(c => c.raw_message
.replaceAll('[图片]', '')
.replaceAll('[表情]', '')
.replaceAll('[动画表情]', '')
.replaceAll('[语音]', '')
.replaceAll(/@\S+\s?/g, '')
.trim()
.map(c => c.message
//只统计文本内容
.filter(item => item.type == 'text')
.map(textItem => `${textItem.text}`)
.join("").trim()
)
.map(c => {
let length = c.length