增加了“最新词云”功能 (#465)

* 增加了"最新词云"功能 * fix bug * fix bug... * fix bug......
2025-12-16 13:27:08 +00:00 · 2023-06-05 11:38:03 +08:00 · 2023-06-05 11:38:03 +08:00 · 220a3f155d
commit 220a3f155d
parent c00b6d85d2
3 changed files with 55 additions and 7 deletions
--- a/apps/entertainment.js
+++ b/apps/entertainment.js
@ -42,6 +42,10 @@ export class Entertainment extends plugin {
          reg: '^#?(今日词云|群友在聊什么)$',
          fnc: 'wordcloud'
        },
+        {
+          reg: '^#(|最新)词云(\\d{1,2}h{0,1}|)$',
+          fnc: 'wordcloud_latest'
+        },
        {
          reg: '^#((寄批踢|gpt|GPT)?翻.*|chatgpt翻译帮助)',
          fnc: 'translate'
@ -188,6 +192,37 @@ ${translateLangLabels}
      await e.reply('请在群里发送此命令')
    }
  }
+  async wordcloud_latest(e) {
+    if (e.isGroup) {
+      let groupId = e.group_id
+      let lock = await redis.get(`CHATGPT:WORDCLOUD:${groupId}`)
+      if (lock) {
+        await e.reply('别着急，上次统计还没完呢')
+        return true
+      }
+
+      const regExp = /词云(\d{0,2})(|h)/
+      const match = e.msg.trim().match(regExp)
+      const duration = !match[1] ? 12 : parseInt(match[1])  // default 12h
+      
+      if(duration > 24) {
+        await e.reply('最多只能统计24小时内的记录哦')
+        return false
+      }
+      await e.reply('在统计啦，请稍等...')
+      
+      await redis.set(`CHATGPT:WORDCLOUD:${groupId}`, '1', {EX: 600})
+      try {
+        await makeWordcloud(e, e.group_id, duration)
+      } catch (err) {
+        logger.error(err)
+        await e.reply(err)
+      }
+      await redis.del(`CHATGPT:WORDCLOUD:${groupId}`)
+    } else {
+      await e.reply('请在群里发送此命令')
+    }
+  }

  async combineEmoj(e) {
    let left = e.msg.codePointAt(0).toString(16).toLowerCase()
--- a/utils/wordcloud/tokenizer.js
+++ b/utils/wordcloud/tokenizer.js
@ -10,7 +10,7 @@ try {
 }

 export class Tokenizer {
-  async getTodayHistory (groupId, date = new Date()) {
+  async getHistory (groupId, date = new Date(), duration = 0) {
    if (!groupId) {
      throw new Error('no valid group id')
    }
@ -29,11 +29,22 @@ export class Tokenizer {
      }
      return 0
    }
+    // Get the current timestamp
+    let currentTime = date.getTime()
+
    // Step 2: Set the hours, minutes, seconds, and milliseconds to 0
    date.setHours(0, 0, 0, 0)

    // Step 3: Calculate the timestamp representing the start of the specified date
-    const startOfSpecifiedDate = date.getTime()
+    // duration represents the number of hours to go back
+    // if duration is 0, keeping the original date (start of today)
+    let startOfSpecifiedDate = date.getTime()
+    // if duration > 0, go back to the specified number of hours
+    if (duration > 0) {
+        // duration should be in range [0, 24]
+        duration = Math.min(duration, 24)
+        startOfSpecifiedDate = currentTime - (duration * 60 * 60 * 1000)
+    }

    // Step 4: Get the end of the specified date by adding 24 hours (in milliseconds)
    const endOfSpecifiedDate = startOfSpecifiedDate + (24 * 60 * 60 * 1000)
@ -56,12 +67,14 @@ export class Tokenizer {
    return chats
  }

-  async getTodayKeywordTopK (groupId, topK = 100) {
+  async getKeywordTopK (groupId, topK = 100, duration = 0) {
    if (!nodejieba) {
      throw new Error('未安装node-rs/jieba，娱乐功能-词云统计不可用')
    }
-    let chats = await this.getTodayHistory(groupId)
-    logger.mark(`聊天记录拉去完成，获取到今日内${chats.length}条聊天记录，准备分词中`)
+    // duration represents the number of hours to go back, should in range [0, 24]
+    let chats = await this.getHistory(groupId, new Date(), duration)
+    let duration_str = duration > 0 ? `${duration}小时` : '今日'
+    logger.mark(`聊天记录拉取完成，获取到${duration_str}内${chats.length}条聊天记录，准备分词中`)
   
    const _path = process.cwd()
    let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`
--- a/utils/wordcloud/wordcloud.js
+++ b/utils/wordcloud/wordcloud.js
@ -1,9 +1,9 @@
 import { Tokenizer } from './tokenizer.js'
 import { render } from '../common.js'

-export async function makeWordcloud (e, groupId) {
+export async function makeWordcloud (e, groupId, duration = 0) {
  let tokenizer = new Tokenizer()
-  let topK = await tokenizer.getTodayKeywordTopK(groupId, 100)
+  let topK = await tokenizer.getKeywordTopK(groupId, 100, duration)
  let list = JSON.stringify(topK)
  // let list = topK
  console.log(list)