chore: change tokenizer to @node-rs/jieba

This commit is contained in:
ikechan8370 2023-04-20 22:15:55 +08:00
parent cd228128ba
commit db72462b8e
2 changed files with 5 additions and 4 deletions

View file

@ -25,12 +25,12 @@
"ws": "^8.13.0" "ws": "^8.13.0"
}, },
"optionalDependencies": { "optionalDependencies": {
"@node-rs/jieba": "^1.6.2",
"jimp": "^0.22.7", "jimp": "^0.22.7",
"node-silk": "^0.1.0", "node-silk": "^0.1.0",
"puppeteer-extra": "^3.3.6", "puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-recaptcha": "^3.6.8", "puppeteer-extra-plugin-recaptcha": "^3.6.8",
"puppeteer-extra-plugin-stealth": "^2.11.2", "puppeteer-extra-plugin-stealth": "^2.11.2",
"sharp": "^0.31.3", "sharp": "^0.31.3"
"nodejieba": "^2.6.0"
} }
} }

View file

@ -2,7 +2,7 @@ import { Config } from '../config.js'
let nodejieba let nodejieba
try { try {
nodejieba = (await import('nodejieba')).default nodejieba = (await import('@node-rs/jieba')).default
} catch (err) { } catch (err) {
logger.info('未安装nodejieba娱乐功能-词云统计不可用') logger.info('未安装nodejieba娱乐功能-词云统计不可用')
} }
@ -60,6 +60,7 @@ export class Tokenizer {
} }
let chats = await this.getTodayHistory(groupId) let chats = await this.getTodayHistory(groupId)
logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`) logger.mark(`聊天记录拉去完成,获取到今日内${chats.length}条聊天记录,准备分词中`)
nodejieba.load()
let chatContent = chats let chatContent = chats
.map(c => c.raw_message .map(c => c.raw_message
.replaceAll('[图片]', '') .replaceAll('[图片]', '')
@ -69,7 +70,7 @@ export class Tokenizer {
) )
.map(c => nodejieba.extract(c, 10)) .map(c => nodejieba.extract(c, 10))
.reduce((acc, curr) => acc.concat(curr), []) .reduce((acc, curr) => acc.concat(curr), [])
.map(c => c.word) .map(c => c.keyword)
if (Config.debug) { if (Config.debug) {
logger.info(chatContent) logger.info(chatContent)
} }