fix:修复对群友@时 昵称中含空格 导致一部分昵称被统计 (#384)

优化:
1.字典在导入nodejieba时初始化一次即可
修复:
1.通过qq消息对象中message对象的type类型直接过滤出文本内容
This commit is contained in:
127Wzc 2023-04-24 23:58:36 +08:00 committed by GitHub
parent 9e4049181a
commit 06b26ab580
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,6 +4,7 @@ import fs from 'fs'
let nodejieba let nodejieba
try { try {
nodejieba = (await import('@node-rs/jieba')).default nodejieba = (await import('@node-rs/jieba')).default
nodejieba.load()
} catch (err) { } catch (err) {
logger.info('未安装@node-rs/jieba娱乐功能-词云统计不可用') logger.info('未安装@node-rs/jieba娱乐功能-词云统计不可用')
} }
@ -61,23 +62,17 @@ export class Tokenizer {
} }
let chats = await this.getTodayHistory(groupId) let chats = await this.getTodayHistory(groupId)
logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`) logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`)
try {
nodejieba.load()
} catch (err) {
// ignore already load error
}
const _path = process.cwd() const _path = process.cwd()
let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt` let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`
const data = fs.readFileSync(stopWordsPath) const data = fs.readFileSync(stopWordsPath)
const stopWords = String(data)?.split('\n') || [] const stopWords = String(data)?.split('\n') || []
let chatContent = chats let chatContent = chats
.map(c => c.raw_message .map(c => c.message
.replaceAll('[图片]', '') //只统计文本内容
.replaceAll('[表情]', '') .filter(item => item.type == 'text')
.replaceAll('[动画表情]', '') .map(textItem => `${textItem.text}`)
.replaceAll('[语音]', '') .join("").trim()
.replaceAll(/@\S+\s?/g, '')
.trim()
) )
.map(c => { .map(c => {
let length = c.length let length = c.length