feat: 乱七八糟的的统计词云

This commit is contained in:
ikechan8370 2023-04-20 17:07:33 +08:00
parent 7f773dcfc4
commit 3cfbe9eae7
7 changed files with 188 additions and 4 deletions

View file

@ -7,6 +7,7 @@ import { emojiRegex, googleRequestUrl } from '../utils/emoj/index.js'
import fetch from 'node-fetch'
import { mkdirs } from '../utils/common.js'
import uploadRecord from '../utils/uploadRecord.js'
import { makeWordcloud } from '../utils/wordcloud/wordcloud.js'
let useSilk = false
try {
@ -36,6 +37,10 @@ export class Entertainment extends plugin {
{
reg: `^(${emojiRegex()}){2}$`,
fnc: 'combineEmoj'
},
{
reg: '^#?(今日词云|群友在聊什么)$',
fnc: 'wordcloud'
}
]
})
@ -50,6 +55,22 @@ export class Entertainment extends plugin {
]
}
async wordcloud (e) {
if (e.isGroup) {
let lock = await redis.get('CHATGPT:WORDCLOUD:ALL')
if (lock) {
await e.reply('别着急,上次统计还没完呢')
return
}
await e.reply('在统计啦,请稍等...')
await redis.set('CHATGPT:WORDCLOUD:ALL', '1', { EX: 600 })
await makeWordcloud(e, e.group_id)
await redis.del('CHATGPT:WORDCLOUD:ALL')
} else {
await e.reply('请在群里发送此命令')
}
}
async combineEmoj (e) {
let left = e.msg.codePointAt(0).toString(16).toLowerCase()
let right = e.msg.codePointAt(2).toString(16).toLowerCase()

View file

@ -91,7 +91,7 @@ export class history extends plugin {
await e.reply('无聊天记录', e.isGroup)
return true
}
await e.reply(await render(e, 'chatgpt-plugin', 'content/History/index', {
await render(e, 'chatgpt-plugin', 'content/History/index', {
version: Config.version,
user: {
qq: queryUser,
@ -102,7 +102,7 @@ export class history extends plugin {
name: Bot.nickname
},
chat
}, {}))
}, {})
}
}

View file

@ -18,6 +18,7 @@
"keyv": "^4.5.2",
"keyv-file": "^0.2.0",
"node-fetch": "^3.3.1",
"nodejieba": "^2.6.0",
"openai": "^3.2.1",
"random": "^4.1.0",
"undici": "^5.21.0",

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>WordCloud</title>
<script src="{{pluResPath}}/wordcloud/js2wordcloud.min.js"></script>
</head>
<body>
<div id="wordcloud2" style="width: 640px;height: 350px"></div>
<script>
let list = JSON.parse('{{@ list}}')
var wc = new Js2WordCloud(document.getElementById('wordcloud2'))
wc.setOption({
tooltip: {
show: true
},
list: list,
color: 'random-light',
fontFamily: 'Microsoft YaHei'
})
</script>
<div class="logo">Created By Yunzai-Bot and ChatGPT-Plugin {{version}}</div>
</body>
</html>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
user-select: none;
}
body {
font-family: sans-serif;
font-size: 16px;
width: 640px;
color: #1e1f20;
transform: scale(1.5);
transform-origin: 0 0;
}
.logo {
font-size: 14px;
font-family: "tttgbnumber";
text-align: center;
color: #7994a7;
}
</style>

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,95 @@
import nodejieba from 'nodejieba'
import { Config } from '../config.js'
export class Tokenizer {
async getTodayHistory (groupId, date = new Date()) {
if (!groupId) {
throw new Error('no valid group id')
}
let group = Bot.pickGroup(groupId, true)
let latestChat = await group.getChatHistory(0, 1)
let seq = latestChat[0].seq
let chats = latestChat
function compareByTime (a, b) {
const timeA = a.time
const timeB = b.time
if (timeA < timeB) {
return -1
}
if (timeA > timeB) {
return 1
}
return 0
}
while (isTimestampInDateRange(chats[0]?.time, date)) {
let chatHistory = await group.getChatHistory(seq, 20)
chats.push(...chatHistory)
chats.sort(compareByTime)
seq = chats[0].seq
}
chats = chats.filter(chat => isTimestampInDateRange(chat.time, date))
return chats
}
async getTodayKeywordTopK (groupId, topK = 100) {
let chats = await this.getTodayHistory(groupId)
let chatContent = chats
.map(c => c.raw_message
.replaceAll('[图片]', '')
.replaceAll('[表情]', '')
.replaceAll('[动画表情]', '')
.replaceAll('[语音]', '')
)
.map(c => nodejieba.extract(c, 10))
.reduce((acc, curr) => acc.concat(curr), [])
.map(c => c.word)
if (Config.debug) {
logger.info(chatContent)
}
const countMap = {}
for (const value of chatContent) {
if (countMap[value]) {
countMap[value]++
} else {
countMap[value] = 1
}
}
let list = Object.keys(countMap).map(k => {
return [k, countMap[k]]
})
function compareByFrequency (a, b) {
const freA = a[1]
const freB = b[1]
if (freA < freB) {
return 1
}
if (freA > freB) {
return -1
}
return 0
}
return list.sort(compareByFrequency).slice(0, topK)
}
}
function isTimestampInDateRange (timestamp, date = null) {
if (!timestamp) {
return false
}
timestamp = timestamp * 1000
if (!date) {
date = new Date()
}
// Step 2: Set the hours, minutes, seconds, and milliseconds to 0
date.setHours(0, 0, 0, 0)
// Step 3: Calculate the timestamp representing the start of the specified date
const startOfSpecifiedDate = date.getTime()
// Step 4: Get the end of the specified date by adding 24 hours (in milliseconds)
const endOfSpecifiedDate = startOfSpecifiedDate + (24 * 60 * 60 * 1000)
// Step 5: Compare the given timestamp with the start and end of the specified date
return timestamp >= startOfSpecifiedDate && timestamp < endOfSpecifiedDate
}

View file

@ -0,0 +1,11 @@
import { Tokenizer } from './tokenizer.js'
import { render } from '../common.js'
export async function makeWordcloud (e, groupId) {
let tokenizer = new Tokenizer()
let topK = await tokenizer.getTodayKeywordTopK(groupId, 100)
let list = JSON.stringify(topK)
// let list = topK
console.log(list)
await render(e, 'chatgpt-plugin', 'wordcloud/index', { list })
}