diff --git a/apps/chat.js b/apps/chat.js index 963b9c7..4a1bbb0 100644 --- a/apps/chat.js +++ b/apps/chat.js @@ -7,6 +7,7 @@ import { ChatGPTAPI } from 'chatgpt' import { BingAIClient } from '@waylaidwanderer/chatgpt-api' import SydneyAIClient from '../utils/SydneyAIClient.js' import { PoeClient } from '../utils/poe/index.js' +import AzureTTS from '../utils/tts/microsoft-azure.js' import { render, renderUrl, getMessageById, @@ -141,6 +142,10 @@ export class chatgpt extends plugin { reg: '^#chatgpt语音模式$', fnc: 'switch2Audio' }, + { + reg: '^#chatgpt语音换源', + fnc: 'switchTTSSource' + }, { reg: '^#chatgpt设置(语音角色|角色语音|角色)', fnc: 'setDefaultRole' @@ -519,26 +524,71 @@ export class chatgpt extends plugin { await this.reply('ChatGPT回复已转换为语音模式') } + async switchTTSSource (e) { + let target = e.msg.replace(/^#chatgpt语音换源/, '') + switch (target.trim()) { + case '1': { + Config.ttsMode = 'vits-uma-genshin-honkai' + break + } + case '2': { + Config.ttsMode = 'azure' + break + } + default: { + await e.reply('请使用#chatgpt语音换源+数字进行换源。1为vits-uma-genshin-honkai,2为微软Azure') + return + } + } + await e.reply('语音转换源已切换为' + Config.ttsMode) + } + async setDefaultRole (e) { - if (!Config.ttsSpace) { - await this.reply('您没有配置VITS API,请前往锅巴面板进行配置') + if (Config.ttsMode === 'vits-uma-genshin-honkai' && !Config.ttsSpace) { + await this.reply('您没有配置vits-uma-genshin-honkai API,请前往后台管理或锅巴面板进行配置') return } - let userSetting = await redis.get(`CHATGPT:USER:${e.sender.user_id}`) - if (!userSetting) { - userSetting = getDefaultReplySetting() - } else { - userSetting = JSON.parse(userSetting) + if (Config.ttsMode === 'azure' && !Config.azureTTSKey) { + await this.reply('您没有配置azure 密钥,请前往后台管理或锅巴面板进行配置') + return } const regex = /^#chatgpt设置(语音角色|角色语音|角色)/ - // let speaker = _.trimStart(e.msg, regex) || '随机' let speaker = e.msg.replace(regex, '').trim() || '随机' - userSetting.ttsRole = convertSpeaker(speaker) - if (speakers.indexOf(userSetting.ttsRole) >= 0) { - await redis.set(`CHATGPT:USER:${e.sender.user_id}`, JSON.stringify(userSetting)) - await this.reply(`您的默认语音角色已被设置为”${userSetting.ttsRole}“`) - } else { - await this.reply(`”抱歉,${userSetting.ttsRole}“我还不认识呢`) + switch (Config.ttsMode) { + case 'vits-uma-genshin-honkai': { + let userSetting = await redis.get(`CHATGPT:USER:${e.sender.user_id}`) + if (!userSetting) { + userSetting = getDefaultReplySetting() + } else { + userSetting = JSON.parse(userSetting) + } + userSetting.ttsRole = convertSpeaker(speaker) + if (speakers.indexOf(userSetting.ttsRole) >= 0) { + await redis.set(`CHATGPT:USER:${e.sender.user_id}`, JSON.stringify(userSetting)) + await this.reply(`您的默认语音角色已被设置为”${userSetting.ttsRole}“`) + } else { + await this.reply(`抱歉,"${userSetting.ttsRole}"我还不认识呢`) + } + break + } + case 'azure': { + let chosen = AzureTTS.supportConfigurations.filter(s => s.name === speaker) + if (chosen.length === 0) { + await this.reply(`抱歉,没有"${speaker}"这个角色,目前azure模式下支持的角色有${AzureTTS.supportConfigurations.map(item => item.name).join('、')}`) + } else { + let userSetting = await redis.get(`CHATGPT:USER:${e.sender.user_id}`) + if (!userSetting) { + userSetting = getDefaultReplySetting() + } else { + userSetting = JSON.parse(userSetting) + } + userSetting.ttsRoleAzure = chosen[0].code + await redis.set(`CHATGPT:USER:${e.sender.user_id}`, JSON.stringify(userSetting)) + // Config.azureTTSSpeaker = chosen[0].code + await this.reply(`您的默认语音角色已被设置为”${speaker}-${chosen[0].gender}-${chosen[0].languageDetail}“`) + } + break + } } } @@ -639,7 +689,12 @@ export class chatgpt extends plugin { userSetting = getDefaultReplySetting() } let useTTS = !!userSetting.useTTS - let speaker = convertSpeaker(userSetting.ttsRole || Config.defaultTTSRole) + let speaker + if (Config.ttsMode === 'vits-uma-genshin-honkai') { + speaker = convertSpeaker(userSetting.ttsRole || Config.defaultTTSRole) + } else if (Config.ttsMode === 'azure') { + speaker = userSetting.ttsRoleAzure || Config.defaultTTSRole + } // 每个回答可以指定 let trySplit = prompt.split('回答:') if (trySplit.length > 1 && speakers.indexOf(convertSpeaker(trySplit[0])) > -1) { @@ -904,7 +959,7 @@ export class chatgpt extends plugin { ttsResponse = response.replace(ttsRegex, '') // 先把文字回复发出去,避免过久等待合成语音 if (Config.alsoSendText || ttsResponse.length > Config.ttsAutoFallbackThreshold) { - if (ttsResponse.length > Config.ttsAutoFallbackThreshold) { + if (Config.ttsMode === 'vits-uma-genshin-honkai' && ttsResponse.length > Config.ttsAutoFallbackThreshold) { await this.reply('回复的内容过长,已转为文本模式') } await this.reply(await convertFaces(response, Config.enableRobotAt, e), e.isGroup) @@ -915,28 +970,38 @@ export class chatgpt extends plugin { this.reply(`建议的回复:\n${chatMessage.suggestedResponses}`) } } - if (Config.ttsSpace && ttsResponse.length <= Config.ttsAutoFallbackThreshold) { + let wav + if (Config.ttsMode === 'vits-uma-genshin-honkai' && Config.ttsSpace) { try { - let wav = await generateAudio(ttsResponse, speaker, '中日混合(中文用[ZH][ZH]包裹起来,日文用[JA][JA]包裹起来)') - try { - let sendable = await uploadRecord(wav) - if (sendable) { - await e.reply(sendable) - } else { - // 如果合成失败,尝试使用ffmpeg合成 - await e.reply(segment.record(wav)) - } - } catch (err) { - logger.error(err) - await e.reply(segment.record(wav)) - } + wav = await generateAudio(ttsResponse, speaker, '中日混合(中文用[ZH][ZH]包裹起来,日文用[JA][JA]包裹起来)') } catch (err) { logger.error(err) await this.reply('合成语音发生错误~') } - } else if (!Config.ttsSpace) { + } else if (Config.ttsMode === 'azure' && Config.azureTTSKey) { + wav = await AzureTTS.generateAudio(ttsResponse, { + speaker: speaker + }) + } else { await this.reply('你没有配置转语音API哦') } + try { + try { + let sendable = await uploadRecord(wav) + if (sendable) { + await e.reply(sendable) + } else { + // 如果合成失败,尝试使用ffmpeg合成 + await e.reply(segment.record(wav)) + } + } catch (err) { + logger.error(err) + await e.reply(segment.record(wav)) + } + } catch (err) { + logger.error(err) + await this.reply('合成语音发生错误~') + } } else if (userSetting.usePicture || (Config.autoUsePicture && response.length > Config.autoUsePictureThreshold)) { // todo use next api of chatgpt to complete incomplete respoonse try { diff --git a/guoba.support.js b/guoba.support.js index f62e821..f577e76 100644 --- a/guoba.support.js +++ b/guoba.support.js @@ -1,6 +1,6 @@ import { Config } from './utils/config.js' import { speakers } from './utils/tts.js' - +import AzureTTS from './utils/tts/microsoft-azure.js' // 支持锅巴 export function supportGuoba () { return { @@ -73,15 +73,47 @@ export function supportGuoba () { bottomHelpMessage: '全局默认以语音形式回复,使用默认角色音色', component: 'Switch' }, + { + field: 'ttsMode', + label: '语音模式源', + bottomHelpMessage: '语音模式下使用何种语音源进行文本->音频转换', + component: 'Select', + componentProps: { + options: [ + { + label: 'vits-uma-genshin-honkai', + value: 'vits-uma-genshin-honkai' + }, + { + label: '微软Azure', + value: 'azure' + } + ] + } + }, { field: 'defaultTTSRole', - label: '语音模式默认角色', - bottomHelpMessage: '语音模式下,未指定角色时使用的角色。若留空,将使用随机角色回复。若用户通过指令指定了角色,将忽略本设定', + label: '语音模式默认角色(vits-uma-genshin-honkai)', + bottomHelpMessage: 'vits-uma-genshin-honkai语音模式下,未指定角色时使用的角色。若留空,将使用随机角色回复。若用户通过指令指定了角色,将忽略本设定', component: 'Select', componentProps: { options: speakers.concat('随机').map(s => { return { label: s, value: s } }) } }, + { + field: 'azureTTSSpeaker', + label: '语音模式默认角色(微软Azure)', + bottomHelpMessage: '微软Azure语音模式下,未指定角色时使用的角色。若用户通过指令指定了角色,将忽略本设定', + component: 'Select', + componentProps: { + options: AzureTTS.supportConfigurations.map(item => { + return { + label: `${item.name}-${item.gender}-${item.languageDetail}`, + value: item.code + } + }) + } + }, { field: 'ttsRegex', label: '语音过滤正则表达式', @@ -509,10 +541,21 @@ export function supportGuoba () { }, { field: 'ttsSpace', - label: '语音转换API地址', + label: 'vits-uma-genshin-honkai语音转换API地址', bottomHelpMessage: '前往duplicate空间https://huggingface.co/spaces/ikechan8370/vits-uma-genshin-honkai后查看api地址', component: 'Input' }, + { + field: 'azureTTSKey', + label: 'Azure语音服务密钥', + component: 'Input' + }, + { + field: 'azureTTSRegion', + label: 'Azure语音服务区域', + bottomHelpMessage: '例如japaneast', + component: 'Input' + }, { field: 'huggingFaceReverseProxy', label: '语音转换huggingface反代', diff --git a/package.json b/package.json index 05be46b..5b21142 100644 --- a/package.json +++ b/package.json @@ -8,6 +8,7 @@ "@fastify/static": "^6.9.0", "@slack/bolt": "^3.13.0", "@waylaidwanderer/chatgpt-api": "^1.33.2", + "asn1.js": "^5.0.0", "chatgpt": "^5.1.1", "delay": "^5.0.0", "diff": "^5.1.0", @@ -17,6 +18,7 @@ "https-proxy-agent": "5.0.1", "keyv": "^4.5.2", "keyv-file": "^0.2.0", + "microsoft-cognitiveservices-speech-sdk": "^1.27.0", "node-fetch": "^3.3.1", "openai": "^3.2.1", "random": "^4.1.0", diff --git a/utils/common.js b/utils/common.js index 600211d..0b8777b 100644 --- a/utils/common.js +++ b/utils/common.js @@ -29,7 +29,7 @@ try { } _puppeteer = new Puppeteer(puppeteerCfg) } catch (e) { - logger.warn('未能加载puppeteer,尝试降级到Yunzai的puppeteer尝试') + logger.debug('未能加载puppeteer,尝试降级到Yunzai的puppeteer尝试') _puppeteer = puppeteer } diff --git a/utils/config.js b/utils/config.js index 2b28432..95ee5e5 100644 --- a/utils/config.js +++ b/utils/config.js @@ -104,6 +104,10 @@ const defaultConfig = { slackClaudeSpecifiedChannel: '', cloudTranscode: 'https://silk.201666.xyz', cloudMode: 'url', + ttsMode: 'vits-uma-genshin-honkai', // or azure + azureTTSKey: '', + azureTTSRegion: '', + azureTTSSpeaker: 'zh-CN-XiaochenNeural', version: 'v2.5.7' } const _path = process.cwd() diff --git a/utils/tts/microsoft-azure.js b/utils/tts/microsoft-azure.js new file mode 100644 index 0000000..aff8fe0 --- /dev/null +++ b/utils/tts/microsoft-azure.js @@ -0,0 +1,246 @@ + +import crypto from 'crypto' +import { mkdirs } from '../common.js' +import { Config } from '../config.js' +let sdk +try { + sdk = (await import('microsoft-cognitiveservices-speech-sdk')).default +} catch (err) { + logger.warn('未安装microsoft-cognitiveservices-speech-sdk,无法使用微软Azure语音源') +} +async function generateAudio (text, option = {}) { + if (!sdk) { + throw new Error('未安装microsoft-cognitiveservices-speech-sdk,无法使用微软Azure语音源') + } + let subscriptionKey = Config.azureTTSKey + let serviceRegion = Config.azureTTSRegion + const _path = process.cwd() + mkdirs(`${_path}/data/chatgpt/tts/azure`) + let filename = `${_path}/data/chatgpt/tts/azure/${crypto.randomUUID()}.wav` + let audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename) + let speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion) + // speechConfig.speechSynthesisLanguage = option?.language || 'zh-CN' + logger.info('using speaker: ' + option?.speaker || 'zh-CN-YunyeNeural') + speechConfig.speechSynthesisVoiceName = option?.speaker || 'zh-CN-YunyeNeural' + let synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig) + + return new Promise((resolve, reject) => { + synthesizer.speakTextAsync(text, result => { + if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) { + console.log('synthesis finished.') + } else { + console.error('Speech synthesis canceled, ' + result.errorDetails + + '\nDid you update the subscription info?') + } + synthesizer.close() + synthesizer = undefined + resolve(filename) + }, err => { + console.error('err - ' + err) + synthesizer.close() + synthesizer = undefined + reject(err) + }) + }) +} + +const supportConfigurations = [ + { + code: 'zh-CN-liaoning-XiaobeiNeural', + name: '晓北', + language: 'zh-CN', + languageDetail: '中文(东北官话,简体)', + gender: '女' + }, + { + code: 'zh-CN-henan-YundengNeural', + name: '云登', + language: 'zh-CN', + languageDetail: '中文(中原官话河南,简体)', + gender: '男' + }, + { + code: 'zh-CN-shaanxi-XiaoniNeural', + name: '晓妮', + language: 'zh-CN', + languageDetail: '中文(中原官话陕西,简体)', + gender: '女' + }, + { + code: 'zh-CN-henan-YundengNeural', + name: '云翔', + language: 'zh-CN', + languageDetail: '中文(冀鲁官话,简体)', + gender: '男' + }, + { + code: 'zh-CN-XiaoxiaoNeural', + name: '晓晓', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-YunxiNeural', + name: '云希', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-YunyangNeural', + name: '云扬', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-YunyeNeural', + name: '云野', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-XiaoshuangNeural', + name: '晓双', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaoyouNeural', + name: '晓悠', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaoqiuNeural', + name: '晓秋', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaochenNeural', + name: '晓辰', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaoyanNeural', + name: '晓颜', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaomoNeural', + name: '晓墨', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaoxuanNeural', + name: '晓萱', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaohanNeural', + name: '晓涵', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaoruiNeural', + name: '晓睿', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaomengNeural', + name: '晓梦', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaoyiNeural', + name: '晓伊', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-XiaozhenNeural', + name: '晓甄', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '女' + }, + { + code: 'zh-CN-YunfengNeural', + name: '云枫', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-YunhaoNeural', + name: '云皓', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-YunjianNeural', + name: '云健', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-YunxiaNeural', + name: '云夏', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-CN-YunzeNeural', + name: '云泽', + language: 'zh-CN', + languageDetail: '中文(普通话,简体)', + gender: '男' + }, + { + code: 'zh-HK-HiuGaaiNeural', + name: '曉佳', + language: 'zh-CN', + languageDetail: '中文(粤语,繁体)', + gender: '女' + }, + { + code: 'zh-HK-HiuMaanNeural', + name: '曉曼', + language: 'zh-CN', + languageDetail: '中文(粤语,繁体)', + gender: '女' + }, + { + code: 'zh-HK-WanLungNeural', + name: '雲龍', + language: 'zh-CN', + languageDetail: '中文(粤语,繁体)', + gender: '男' + } +] + +export default { generateAudio, supportConfigurations }