增加了“最新词云”功能 (#465)

* 增加了"最新词云"功能

* fix bug

* fix bug...

* fix bug......
This commit is contained in:
kae_mihara 2023-06-05 11:38:03 +08:00 committed by GitHub
parent c00b6d85d2
commit 220a3f155d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 7 deletions

View file

@ -42,6 +42,10 @@ export class Entertainment extends plugin {
reg: '^#?(今日词云|群友在聊什么)$',
fnc: 'wordcloud'
},
{
reg: '^#(|最新)词云(\\d{1,2}h{0,1}|)$',
fnc: 'wordcloud_latest'
},
{
reg: '^#((寄批踢|gpt|GPT)?翻.*|chatgpt翻译帮助)',
fnc: 'translate'
@ -188,6 +192,37 @@ ${translateLangLabels}
await e.reply('请在群里发送此命令')
}
}
async wordcloud_latest(e) {
if (e.isGroup) {
let groupId = e.group_id
let lock = await redis.get(`CHATGPT:WORDCLOUD:${groupId}`)
if (lock) {
await e.reply('别着急,上次统计还没完呢')
return true
}
const regExp = /词云(\d{0,2})(|h)/
const match = e.msg.trim().match(regExp)
const duration = !match[1] ? 12 : parseInt(match[1]) // default 12h
if(duration > 24) {
await e.reply('最多只能统计24小时内的记录哦')
return false
}
await e.reply('在统计啦,请稍等...')
await redis.set(`CHATGPT:WORDCLOUD:${groupId}`, '1', {EX: 600})
try {
await makeWordcloud(e, e.group_id, duration)
} catch (err) {
logger.error(err)
await e.reply(err)
}
await redis.del(`CHATGPT:WORDCLOUD:${groupId}`)
} else {
await e.reply('请在群里发送此命令')
}
}
async combineEmoj(e) {
let left = e.msg.codePointAt(0).toString(16).toLowerCase()

View file

@ -10,7 +10,7 @@ try {
}
export class Tokenizer {
async getTodayHistory (groupId, date = new Date()) {
async getHistory (groupId, date = new Date(), duration = 0) {
if (!groupId) {
throw new Error('no valid group id')
}
@ -29,11 +29,22 @@ export class Tokenizer {
}
return 0
}
// Get the current timestamp
let currentTime = date.getTime()
// Step 2: Set the hours, minutes, seconds, and milliseconds to 0
date.setHours(0, 0, 0, 0)
// Step 3: Calculate the timestamp representing the start of the specified date
const startOfSpecifiedDate = date.getTime()
// duration represents the number of hours to go back
// if duration is 0, keeping the original date (start of today)
let startOfSpecifiedDate = date.getTime()
// if duration > 0, go back to the specified number of hours
if (duration > 0) {
// duration should be in range [0, 24]
duration = Math.min(duration, 24)
startOfSpecifiedDate = currentTime - (duration * 60 * 60 * 1000)
}
// Step 4: Get the end of the specified date by adding 24 hours (in milliseconds)
const endOfSpecifiedDate = startOfSpecifiedDate + (24 * 60 * 60 * 1000)
@ -56,12 +67,14 @@ export class Tokenizer {
return chats
}
async getTodayKeywordTopK (groupId, topK = 100) {
async getKeywordTopK (groupId, topK = 100, duration = 0) {
if (!nodejieba) {
throw new Error('未安装node-rs/jieba娱乐功能-词云统计不可用')
}
let chats = await this.getTodayHistory(groupId)
logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`)
// duration represents the number of hours to go back, should in range [0, 24]
let chats = await this.getHistory(groupId, new Date(), duration)
let duration_str = duration > 0 ? `${duration}小时` : '今日'
logger.mark(`聊天记录拉取完成,获取到${duration_str}${chats.length}条聊天记录,准备分词中`)
const _path = process.cwd()
let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`

View file

@ -1,9 +1,9 @@
import { Tokenizer } from './tokenizer.js'
import { render } from '../common.js'
export async function makeWordcloud (e, groupId) {
export async function makeWordcloud (e, groupId, duration = 0) {
let tokenizer = new Tokenizer()
let topK = await tokenizer.getTodayKeywordTopK(groupId, 100)
let topK = await tokenizer.getKeywordTopK(groupId, 100, duration)
let list = JSON.stringify(topK)
// let list = topK
console.log(list)