mirror of
https://github.com/ikechan8370/chatgpt-plugin.git
synced 2025-12-16 13:27:08 +00:00
fix:修复对群友@时 昵称中含空格 导致一部分昵称被统计 (#384)
优化: 1.字典在导入nodejieba时初始化一次即可 修复: 1.通过qq消息对象中message对象的type类型直接过滤出文本内容
This commit is contained in:
parent
9e4049181a
commit
06b26ab580
1 changed files with 7 additions and 12 deletions
|
|
@ -4,6 +4,7 @@ import fs from 'fs'
|
|||
let nodejieba
|
||||
try {
|
||||
nodejieba = (await import('@node-rs/jieba')).default
|
||||
nodejieba.load()
|
||||
} catch (err) {
|
||||
logger.info('未安装@node-rs/jieba,娱乐功能-词云统计不可用')
|
||||
}
|
||||
|
|
@ -61,23 +62,17 @@ export class Tokenizer {
|
|||
}
|
||||
let chats = await this.getTodayHistory(groupId)
|
||||
logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`)
|
||||
try {
|
||||
nodejieba.load()
|
||||
} catch (err) {
|
||||
// ignore already load error
|
||||
}
|
||||
|
||||
const _path = process.cwd()
|
||||
let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`
|
||||
const data = fs.readFileSync(stopWordsPath)
|
||||
const stopWords = String(data)?.split('\n') || []
|
||||
let chatContent = chats
|
||||
.map(c => c.raw_message
|
||||
.replaceAll('[图片]', '')
|
||||
.replaceAll('[表情]', '')
|
||||
.replaceAll('[动画表情]', '')
|
||||
.replaceAll('[语音]', '')
|
||||
.replaceAll(/@\S+\s?/g, '')
|
||||
.trim()
|
||||
.map(c => c.message
|
||||
//只统计文本内容
|
||||
.filter(item => item.type == 'text')
|
||||
.map(textItem => `${textItem.text}`)
|
||||
.join("").trim()
|
||||
)
|
||||
.map(c => {
|
||||
let length = c.length
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue