mirror of
https://github.com/ikechan8370/chatgpt-plugin.git
synced 2025-12-16 13:27:08 +00:00
112 lines
3.6 KiB
JavaScript
112 lines
3.6 KiB
JavaScript
import { Config } from '../config.js'
|
||
|
||
let nodejieba
|
||
try {
|
||
nodejieba = (await import('@node-rs/jieba')).default
|
||
} catch (err) {
|
||
logger.info('未安装nodejieba,娱乐功能-词云统计不可用')
|
||
}
|
||
|
||
export class Tokenizer {
|
||
async getTodayHistory (groupId, date = new Date()) {
|
||
if (!groupId) {
|
||
throw new Error('no valid group id')
|
||
}
|
||
let group = Bot.pickGroup(groupId, true)
|
||
let latestChat = await group.getChatHistory(0, 1)
|
||
let seq = latestChat[0].seq
|
||
let chats = latestChat
|
||
function compareByTime (a, b) {
|
||
const timeA = a.time
|
||
const timeB = b.time
|
||
if (timeA < timeB) {
|
||
return -1
|
||
}
|
||
if (timeA > timeB) {
|
||
return 1
|
||
}
|
||
return 0
|
||
}
|
||
// Step 2: Set the hours, minutes, seconds, and milliseconds to 0
|
||
date.setHours(0, 0, 0, 0)
|
||
|
||
// Step 3: Calculate the timestamp representing the start of the specified date
|
||
const startOfSpecifiedDate = date.getTime()
|
||
|
||
// Step 4: Get the end of the specified date by adding 24 hours (in milliseconds)
|
||
const endOfSpecifiedDate = startOfSpecifiedDate + (24 * 60 * 60 * 1000)
|
||
while (isTimestampInDateRange(chats[0]?.time, startOfSpecifiedDate, endOfSpecifiedDate) && isTimestampInDateRange(chats[chats.length - 1]?.time, startOfSpecifiedDate, endOfSpecifiedDate)) {
|
||
let chatHistory = await group.getChatHistory(seq, 20)
|
||
if (chatHistory.length === 1) {
|
||
if (chats[0].seq === chatHistory[0].seq) {
|
||
// 昨天没有聊天记录 比如新建的群 新进群的机器人 会卡在某一条
|
||
break
|
||
}
|
||
}
|
||
chats.push(...chatHistory)
|
||
chats.sort(compareByTime)
|
||
seq = chatHistory[0].seq
|
||
if (Config.debug) {
|
||
logger.info(`拉取到${chatHistory.length}条聊天记录,当前已累计获取${chats.length}条聊天记录,继续拉...`)
|
||
}
|
||
}
|
||
chats = chats.filter(chat => isTimestampInDateRange(chat.time, startOfSpecifiedDate, endOfSpecifiedDate))
|
||
return chats
|
||
}
|
||
|
||
async getTodayKeywordTopK (groupId, topK = 100) {
|
||
if (!nodejieba) {
|
||
throw new Error('未安装nodejieba,娱乐功能-词云统计不可用')
|
||
}
|
||
let chats = await this.getTodayHistory(groupId)
|
||
logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`)
|
||
nodejieba.load()
|
||
let chatContent = chats
|
||
.map(c => c.raw_message
|
||
.replaceAll('[图片]', '')
|
||
.replaceAll('[表情]', '')
|
||
.replaceAll('[动画表情]', '')
|
||
.replaceAll('[语音]', '')
|
||
)
|
||
.map(c => nodejieba.extract(c, 10))
|
||
.reduce((acc, curr) => acc.concat(curr), [])
|
||
.map(c => c.keyword)
|
||
if (Config.debug) {
|
||
logger.info(chatContent)
|
||
}
|
||
const countMap = {}
|
||
for (const value of chatContent) {
|
||
if (countMap[value]) {
|
||
countMap[value]++
|
||
} else {
|
||
countMap[value] = 1
|
||
}
|
||
}
|
||
let list = Object.keys(countMap).map(k => {
|
||
return [k, countMap[k]]
|
||
})
|
||
function compareByFrequency (a, b) {
|
||
const freA = a[1]
|
||
const freB = b[1]
|
||
if (freA < freB) {
|
||
return 1
|
||
}
|
||
if (freA > freB) {
|
||
return -1
|
||
}
|
||
return 0
|
||
}
|
||
logger.mark('分词统计完成,绘制词云中...')
|
||
return list.sort(compareByFrequency).slice(0, topK)
|
||
}
|
||
}
|
||
|
||
function isTimestampInDateRange (timestamp, startOfSpecifiedDate, endOfSpecifiedDate) {
|
||
if (!timestamp) {
|
||
return false
|
||
}
|
||
timestamp = timestamp * 1000
|
||
|
||
// Step 5: Compare the given timestamp with the start and end of the specified date
|
||
return timestamp >= startOfSpecifiedDate && timestamp < endOfSpecifiedDate
|
||
}
|