feat: 进一步适配铃音+shamrock;词云进化

2025-12-16 21:37:11 +00:00 · 2023-11-07 18:54:58 +08:00 · 2023-11-07 18:54:58 +08:00 · 0bff8c85b0
commit 0bff8c85b0
parent 61d3b48781
7 changed files with 209 additions and 65 deletions
--- a/apps/chat.js
+++ b/apps/chat.js
@ -77,6 +77,7 @@ import {solveCaptchaOneShot} from '../utils/bingCaptcha.js'
 import {ClaudeAIClient} from '../utils/claude.ai/index.js'
 import {getProxy} from '../utils/proxy.js'
 import {QwenApi} from '../utils/alibaba/qwen-api.js'
+import {getChatHistoryGroup} from '../utils/chat.js'

 try {
  await import('@azure/openai')
@ -1660,28 +1661,7 @@ export class chatgpt extends plugin {
                  if (master && !e.group) {
                    opt.masterName = e.bot.getFriendList().get(parseInt(master))?.nickname
                  }
-                  let latestChats = await e.group.getChatHistory(0, 1)
-                  if (latestChats.length > 0) {
-                    let latestChat = latestChats[0]
-                    if (latestChat) {
-                      let seq = latestChat.seq
-                      let chats = []
-                      while (chats.length < Config.groupContextLength) {
-                        let chatHistory = await e.group.getChatHistory(seq, 20)
-                        chats.push(...chatHistory)
-                      }
-                      chats = chats.slice(0, Config.groupContextLength)
-                      let mm = await e.group.getMemberMap()
-                      chats.forEach(chat => {
-                        let sender = mm.get(chat.sender.user_id)
-                        if (sender) {
-                          chat.sender = sender
-                        }
-                      })
-                      // console.log(chats)
-                      opt.chats = chats
-                    }
-                  }
+                  opt.chats = await getChatHistoryGroup(e, Config.groupContextLength)
                } catch (err) {
                  logger.warn('获取群聊聊天记录失败，本次对话不携带聊天记录', err)
                }
@ -2171,21 +2151,7 @@ export class chatgpt extends plugin {
            if (master && !e.group) {
              opt.masterName = e.bot.getFriendList().get(parseInt(master))?.nickname
            }
-            let latestChat = await e.group.getChatHistory(0, 1)
-            let seq = latestChat[0].seq
-            let chats = []
-            while (chats.length < Config.groupContextLength) {
-              let chatHistory = await e.group.getChatHistory(seq, 20)
-              chats.push(...chatHistory.reverse())
-            }
-            chats = chats.slice(0, Config.groupContextLength)
-            // 太多可能会干扰AI对自身qq号和用户qq的判断，感觉gpt3.5也处理不了那么多信息
-            chats = chats > 50 ? 50 : chats
-            let mm = await e.group.getMemberMap()
-            chats.forEach(chat => {
-              let sender = mm.get(chat.sender.user_id)
-              chat.sender = sender
-            })
+            let chats = await getChatHistoryGroup(e, Config.groupContextLength)
            opt.chats = chats
            const namePlaceholder = '[name]'
            const defaultBotName = 'ChatGPT'
--- a/apps/entertainment.js
+++ b/apps/entertainment.js
@ -50,6 +50,10 @@ export class Entertainment extends plugin {
          reg: '^#(|最新)词云(\\d{1,2}h{0,1}|)$',
          fnc: 'wordcloud_latest'
        },
+        {
+          reg: '^#(我的)?(本月|本周|今日)?词云$',
+          fnc: 'wordcloud_new'
+        },
        {
          reg: '^#((寄批踢|gpt|GPT)?翻.*|chatgpt翻译帮助)',
          fnc: 'translate'
@ -218,7 +222,7 @@ ${translateLangLabels}
      const duration = !match[1] ? 12 : parseInt(match[1]) // default 12h

      if (duration > 24) {
-        await e.reply('最多只能统计24小时内的记录哦')
+        await e.reply('最多只能统计24小时内的记录哦，你可以使用#本周词云和#本月词云获取更长时间的统计~')
        return false
      }
      await e.reply('在统计啦，请稍等...')
@ -236,6 +240,56 @@ ${translateLangLabels}
    }
  }

+  async wordcloud_new (e) {
+    if (e.isGroup) {
+      let groupId = e.group_id
+      let userId
+      if (e.msg.includes('我的')) {
+        userId = e.sender.user_id
+      }
+      let at = e.message.find(m => m.type === 'at')
+      if (at) {
+        userId = at.qq
+      }
+      let lock = await redis.get(`CHATGPT:WORDCLOUD_NEW:${groupId}_${userId}`)
+      if (lock) {
+        await e.reply('别着急，上次统计还没完呢')
+        return true
+      }
+      await e.reply('在统计啦，请稍等...')
+      let duration = 24
+      if (e.msg.includes('本周')) {
+        const now = new Date() // Get the current date and time
+        let day = now.getDay()
+        let diff = now.getDate() - day + (day === 0 ? -6 : 1)
+        const startOfWeek = new Date(new Date().setDate(diff))
+        startOfWeek.setHours(0, 0, 0, 0) // Set the time to midnight (start of the day)
+        duration = (now - startOfWeek) / 1000 / 60 / 60
+      } else if (e.msg.includes('本月')) {
+        const now = new Date() // Get the current date and time
+        const startOfMonth = new Date(new Date().setDate(0))
+        startOfMonth.setHours(0, 0, 0, 0) // Set the time to midnight (start of the day)
+        duration = (now - startOfMonth) / 1000 / 60 / 60
+      } else {
+        // 默认今天
+        const now = new Date()
+        const startOfToday = new Date() // Get the current date and time
+        startOfToday.setHours(0, 0, 0, 0) // Set the time to midnight (start of the day)
+        duration = (now - startOfToday) / 1000 / 60 / 60
+      }
+      await redis.set(`CHATGPT:WORDCLOUD_NEW:${groupId}_${userId}`, '1', { EX: 600 })
+      try {
+        await makeWordcloud(e, e.group_id, duration, userId)
+      } catch (err) {
+        logger.error(err)
+        await e.reply(err)
+      }
+      await redis.del(`CHATGPT:WORDCLOUD_NEW:${groupId}_${userId}`)
+    } else {
+      await e.reply('请在群里发送此命令')
+    }
+  }
+
  async combineEmoj (e) {
    let left = e.msg.codePointAt(0).toString(16).toLowerCase()
    let right = e.msg.codePointAt(2).toString(16).toLowerCase()
--- a/utils/SydneyAIClient.js
+++ b/utils/SydneyAIClient.js
@ -476,7 +476,7 @@ export default class SydneyAIClient {
        context += chats
          .map(chat => {
            let sender = chat.sender || chat || {}
-            if (chat.raw_message.startsWith('建议的回复')) {
+            if (chat.raw_message?.startsWith('建议的回复')) {
              // 建议的回复太容易污染设定导致对话太固定跑偏了
              return ''
            }
--- a/utils/chat.js
+++ b/utils/chat.js
@ -0,0 +1,33 @@
+export async function getChatHistoryGroup (e, num) {
+  if (e.adapter === 'shamrock') {
+    return await e.group.getChatHistory(0, num)
+  } else {
+    let latestChats = await e.group.getChatHistory(0, 1)
+    if (latestChats.length > 0) {
+      let latestChat = latestChats[0]
+      if (latestChat) {
+        let seq = latestChat.seq
+        let chats = []
+        while (chats.length < num) {
+          let chatHistory = await e.group.getChatHistory(seq, 20)
+          chats.push(...chatHistory)
+        }
+        chats = chats.slice(0, num)
+        try {
+          let mm = await e.group.getMemberMap()
+          chats.forEach(chat => {
+            let sender = mm.get(chat.sender.user_id)
+            if (sender) {
+              chat.sender = sender
+            }
+          })
+        } catch (err) {
+          logger.warn(err)
+        }
+        // console.log(chats)
+        return chats
+      }
+    }
+  }
+  return []
+}
--- a/utils/common.js
+++ b/utils/common.js
@ -870,7 +870,7 @@ export async function generateAudio (e, pendingText, speakingEmotion, emotionDeg
  if (!Config.ttsSpace && !Config.azureTTSKey && !Config.voicevoxSpace) return false
  let wav
  const speaker = getUserSpeaker(await getUserReplySetting(e))
-  let ignoreEncode = getUin(e) === 88888
+  let ignoreEncode = e.adapter === 'shamrock'
  try {
    if (Config.ttsMode === 'vits-uma-genshin-honkai' && Config.ttsSpace) {
      if (Config.autoJapanese) {
--- a/utils/wordcloud/tokenizer.js
+++ b/utils/wordcloud/tokenizer.js
@ -10,11 +10,11 @@ try {
 }

 export class Tokenizer {
-  async getHistory (groupId, date = new Date(), duration = 0) {
+  async getHistory (e, groupId, date = new Date(), duration = 0, userId) {
    if (!groupId) {
      throw new Error('no valid group id')
    }
-    let group = Bot.pickGroup(groupId, true)
+    let group = e.bot.pickGroup(groupId, true)
    let latestChat = await group.getChatHistory(0, 1)
    let seq = latestChat[0].seq
    let chats = latestChat
@ -42,13 +42,14 @@ export class Tokenizer {
    // if duration > 0, go back to the specified number of hours
    if (duration > 0) {
      // duration should be in range [0, 24]
-        duration = Math.min(duration, 24)
+      // duration = Math.min(duration, 24)
      startOfSpecifiedDate = currentTime - (duration * 60 * 60 * 1000)
    }

-    // Step 4: Get the end of the specified date by adding 24 hours (in milliseconds)
-    const endOfSpecifiedDate = startOfSpecifiedDate + (24 * 60 * 60 * 1000)
-    while (isTimestampInDateRange(chats[0]?.time, startOfSpecifiedDate, endOfSpecifiedDate) && isTimestampInDateRange(chats[chats.length - 1]?.time, startOfSpecifiedDate, endOfSpecifiedDate)) {
+    // Step 4: Get the end of the specified date by current time
+    const endOfSpecifiedDate = currentTime
+    while (isTimestampInDateRange(chats[0]?.time, startOfSpecifiedDate, endOfSpecifiedDate) &&
+    isTimestampInDateRange(chats[chats.length - 1]?.time, startOfSpecifiedDate, endOfSpecifiedDate)) {
      let chatHistory = await group.getChatHistory(seq, 20)
      if (chatHistory.length === 1) {
        if (chats[0].seq === chatHistory[0].seq) {
@ -64,17 +65,20 @@ export class Tokenizer {
      }
    }
    chats = chats.filter(chat => isTimestampInDateRange(chat.time, startOfSpecifiedDate, endOfSpecifiedDate))
+    if (userId) {
+      chats = chats.filter(chat => chat.sender.user_id === userId)
+    }
    return chats
  }

-  async getKeywordTopK (groupId, topK = 100, duration = 0) {
+  async getKeywordTopK (e, groupId, topK = 100, duration = 0, userId) {
    if (!nodejieba) {
      throw new Error('未安装node-rs/jieba，娱乐功能-词云统计不可用')
    }
    // duration represents the number of hours to go back, should in range [0, 24]
-    let chats = await this.getHistory(groupId, new Date(), duration)
-    let duration_str = duration > 0 ? `${duration}小时` : '今日'
-    logger.mark(`聊天记录拉取完成，获取到${duration_str}内${chats.length}条聊天记录，准备分词中`)
+    let chats = await this.getHistory(e, groupId, new Date(), duration, userId)
+    let durationStr = duration > 0 ? `${duration}小时` : '今日'
+    logger.mark(`聊天记录拉取完成，获取到${durationStr}内${chats.length}条聊天记录，准备分词中`)

    const _path = process.cwd()
    let stopWordsPath = `${_path}/plugins/chatgpt-plugin/utils/wordcloud/cn_stopwords.txt`
@ -82,10 +86,10 @@ export class Tokenizer {
    const stopWords = String(data)?.split('\n') || []
    let chatContent = chats
      .map(c => c.message
-           //只统计文本内容
+      // 只统计文本内容
        .filter(item => item.type == 'text')
        .map(textItem => `${textItem.text}`)
-           .join("").trim()
+        .join('').trim()
      )
      .map(c => {
        let length = c.length
@ -132,6 +136,85 @@ export class Tokenizer {
  }
 }

+export class ShamrockTokenizer extends Tokenizer {
+  async getHistory (e, groupId, date = new Date(), duration = 0, userId) {
+    logger.mark('当前使用Shamrock适配器')
+    if (!groupId) {
+      throw new Error('no valid group id')
+    }
+    let group = e.bot.pickGroup(groupId, true)
+    // 直接加大力度
+    let pageSize = 500
+    let chats = (await group.getChatHistory(0, pageSize, false)) || []
+    // Get the current timestamp
+    let currentTime = date.getTime()
+
+    // Step 2: Set the hours, minutes, seconds, and milliseconds to 0
+    date.setHours(0, 0, 0, 0)
+
+    // Step 3: Calculate the timestamp representing the start of the specified date
+    // duration represents the number of hours to go back
+    // if duration is 0, keeping the original date (start of today)
+    let startOfSpecifiedDate = date.getTime()
+    // if duration > 0, go back to the specified number of hours
+    if (duration > 0) {
+      // duration should be in range [0, 24]
+      // duration = Math.min(duration, 24)
+      startOfSpecifiedDate = currentTime - (duration * 60 * 60 * 1000)
+    }
+
+    // Step 4: Get the end of the specified date by currentTime
+    const endOfSpecifiedDate = currentTime
+    let cursor = chats.length
+    // -------------------------------------------------------
+    //               |             |            |
+    // -------------------------------------------------------
+    //                             ^            ^
+    // long ago           cursor+pageSize     cursor       current
+    while (isTimestampInDateRange(chats[0]?.time, startOfSpecifiedDate, endOfSpecifiedDate)) {
+      // 由于Shamrock消息是从最新的开始拉，结束时由于动态更新，一旦有人发送消息就会立刻停止，所以不判断结束时间
+      // 拉到后面会巨卡，所以增大page减少次数
+      pageSize = Math.floor(Math.max(cursor / 2, pageSize))
+      cursor = cursor + pageSize
+      let retries = 3
+      let chatHistory
+      while (retries >= 0) {
+        try {
+          chatHistory = await group.getChatHistory(0, cursor, false)
+          break
+        } catch (err) {
+          if (retries === 0) {
+            logger.error(err)
+          }
+          retries--
+        }
+      }
+      if (retries < 0) {
+        logger.warn('拉不动了，就这样吧')
+        break
+      }
+      if (chatHistory.length === 1) {
+        break
+      }
+      if (chatHistory.length === chats.length) {
+        // 没有了！再拉也没有了
+        break
+      }
+      let oldLength = chats.length
+      chats = chatHistory
+      // chats.sort(compareByTime)
+      if (Config.debug) {
+        logger.info(`拉取到${chats.length - oldLength}条聊天记录，当前已累计获取${chats.length}条聊天记录，继续拉...`)
+      }
+    }
+    chats = chats.filter(chat => isTimestampInDateRange(chat.time, startOfSpecifiedDate, endOfSpecifiedDate))
+    if (userId) {
+      chats = chats.filter(chat => chat.sender.user_id === userId)
+    }
+    return chats
+  }
+}
+
 function isTimestampInDateRange (timestamp, startOfSpecifiedDate, endOfSpecifiedDate) {
  if (!timestamp) {
    return false
--- a/utils/wordcloud/wordcloud.js
+++ b/utils/wordcloud/wordcloud.js
@ -1,11 +1,19 @@
-import { Tokenizer } from './tokenizer.js'
+import { ShamrockTokenizer, Tokenizer } from './tokenizer.js'
 import { render } from '../common.js'

-export async function makeWordcloud (e, groupId, duration = 0) {
-  let tokenizer = new Tokenizer()
-  let topK = await tokenizer.getKeywordTopK(groupId, 100, duration)
+export async function makeWordcloud (e, groupId, duration = 0, userId) {
+  let tokenizer = getTokenizer(e)
+  let topK = await tokenizer.getKeywordTopK(e, groupId, 100, duration, userId)
  let list = JSON.stringify(topK)
-  // let list = topK
-  console.log(list)
-  await render(e, 'chatgpt-plugin', 'wordcloud/index', { list })
+  logger.info(list)
+  let img = await render(e, 'chatgpt-plugin', 'wordcloud/index', { list }, { retType: 'base64' })
+  await e.reply(img, true)
+}
+
+function getTokenizer (e) {
+  if (e.adapter === 'shamrock') {
+    return new ShamrockTokenizer()
+  } else {
+    return new Tokenizer()
+  }
 }