feat: 添加Azure语音支持

This commit is contained in:
ikechan8370 2023-04-22 22:54:03 +08:00
parent 20f6cd5390
commit cf992bbc2c
6 changed files with 396 additions and 36 deletions

View file

@ -7,6 +7,7 @@ import { ChatGPTAPI } from 'chatgpt'
import { BingAIClient } from '@waylaidwanderer/chatgpt-api' import { BingAIClient } from '@waylaidwanderer/chatgpt-api'
import SydneyAIClient from '../utils/SydneyAIClient.js' import SydneyAIClient from '../utils/SydneyAIClient.js'
import { PoeClient } from '../utils/poe/index.js' import { PoeClient } from '../utils/poe/index.js'
import AzureTTS from '../utils/tts/microsoft-azure.js'
import { import {
render, renderUrl, render, renderUrl,
getMessageById, getMessageById,
@ -141,6 +142,10 @@ export class chatgpt extends plugin {
reg: '^#chatgpt语音模式$', reg: '^#chatgpt语音模式$',
fnc: 'switch2Audio' fnc: 'switch2Audio'
}, },
{
reg: '^#chatgpt语音换源',
fnc: 'switchTTSSource'
},
{ {
reg: '^#chatgpt设置(语音角色|角色语音|角色)', reg: '^#chatgpt设置(语音角色|角色语音|角色)',
fnc: 'setDefaultRole' fnc: 'setDefaultRole'
@ -519,26 +524,71 @@ export class chatgpt extends plugin {
await this.reply('ChatGPT回复已转换为语音模式') await this.reply('ChatGPT回复已转换为语音模式')
} }
async switchTTSSource (e) {
let target = e.msg.replace(/^#chatgpt语音换源/, '')
switch (target.trim()) {
case '1': {
Config.ttsMode = 'vits-uma-genshin-honkai'
break
}
case '2': {
Config.ttsMode = 'azure'
break
}
default: {
await e.reply('请使用#chatgpt语音换源+数字进行换源。1为vits-uma-genshin-honkai2为微软Azure')
return
}
}
await e.reply('语音转换源已切换为' + Config.ttsMode)
}
async setDefaultRole (e) { async setDefaultRole (e) {
if (!Config.ttsSpace) { if (Config.ttsMode === 'vits-uma-genshin-honkai' && !Config.ttsSpace) {
await this.reply('您没有配置VITS API请前往锅巴面板进行配置') await this.reply('您没有配置vits-uma-genshin-honkai API请前往后台管理或锅巴面板进行配置')
return return
} }
let userSetting = await redis.get(`CHATGPT:USER:${e.sender.user_id}`) if (Config.ttsMode === 'azure' && !Config.azureTTSKey) {
if (!userSetting) { await this.reply('您没有配置azure 密钥,请前往后台管理或锅巴面板进行配置')
userSetting = getDefaultReplySetting() return
} else {
userSetting = JSON.parse(userSetting)
} }
const regex = /^#chatgpt设置(语音角色|角色语音|角色)/ const regex = /^#chatgpt设置(语音角色|角色语音|角色)/
// let speaker = _.trimStart(e.msg, regex) || '随机'
let speaker = e.msg.replace(regex, '').trim() || '随机' let speaker = e.msg.replace(regex, '').trim() || '随机'
userSetting.ttsRole = convertSpeaker(speaker) switch (Config.ttsMode) {
if (speakers.indexOf(userSetting.ttsRole) >= 0) { case 'vits-uma-genshin-honkai': {
await redis.set(`CHATGPT:USER:${e.sender.user_id}`, JSON.stringify(userSetting)) let userSetting = await redis.get(`CHATGPT:USER:${e.sender.user_id}`)
await this.reply(`您的默认语音角色已被设置为”${userSetting.ttsRole}`) if (!userSetting) {
} else { userSetting = getDefaultReplySetting()
await this.reply(`”抱歉,${userSetting.ttsRole}“我还不认识呢`) } else {
userSetting = JSON.parse(userSetting)
}
userSetting.ttsRole = convertSpeaker(speaker)
if (speakers.indexOf(userSetting.ttsRole) >= 0) {
await redis.set(`CHATGPT:USER:${e.sender.user_id}`, JSON.stringify(userSetting))
await this.reply(`您的默认语音角色已被设置为”${userSetting.ttsRole}`)
} else {
await this.reply(`抱歉,"${userSetting.ttsRole}"我还不认识呢`)
}
break
}
case 'azure': {
let chosen = AzureTTS.supportConfigurations.filter(s => s.name === speaker)
if (chosen.length === 0) {
await this.reply(`抱歉,没有"${speaker}"这个角色目前azure模式下支持的角色有${AzureTTS.supportConfigurations.map(item => item.name).join('、')}`)
} else {
let userSetting = await redis.get(`CHATGPT:USER:${e.sender.user_id}`)
if (!userSetting) {
userSetting = getDefaultReplySetting()
} else {
userSetting = JSON.parse(userSetting)
}
userSetting.ttsRoleAzure = chosen[0].code
await redis.set(`CHATGPT:USER:${e.sender.user_id}`, JSON.stringify(userSetting))
// Config.azureTTSSpeaker = chosen[0].code
await this.reply(`您的默认语音角色已被设置为”${speaker}-${chosen[0].gender}-${chosen[0].languageDetail}`)
}
break
}
} }
} }
@ -639,7 +689,12 @@ export class chatgpt extends plugin {
userSetting = getDefaultReplySetting() userSetting = getDefaultReplySetting()
} }
let useTTS = !!userSetting.useTTS let useTTS = !!userSetting.useTTS
let speaker = convertSpeaker(userSetting.ttsRole || Config.defaultTTSRole) let speaker
if (Config.ttsMode === 'vits-uma-genshin-honkai') {
speaker = convertSpeaker(userSetting.ttsRole || Config.defaultTTSRole)
} else if (Config.ttsMode === 'azure') {
speaker = userSetting.ttsRoleAzure || Config.defaultTTSRole
}
// 每个回答可以指定 // 每个回答可以指定
let trySplit = prompt.split('回答:') let trySplit = prompt.split('回答:')
if (trySplit.length > 1 && speakers.indexOf(convertSpeaker(trySplit[0])) > -1) { if (trySplit.length > 1 && speakers.indexOf(convertSpeaker(trySplit[0])) > -1) {
@ -904,7 +959,7 @@ export class chatgpt extends plugin {
ttsResponse = response.replace(ttsRegex, '') ttsResponse = response.replace(ttsRegex, '')
// 先把文字回复发出去,避免过久等待合成语音 // 先把文字回复发出去,避免过久等待合成语音
if (Config.alsoSendText || ttsResponse.length > Config.ttsAutoFallbackThreshold) { if (Config.alsoSendText || ttsResponse.length > Config.ttsAutoFallbackThreshold) {
if (ttsResponse.length > Config.ttsAutoFallbackThreshold) { if (Config.ttsMode === 'vits-uma-genshin-honkai' && ttsResponse.length > Config.ttsAutoFallbackThreshold) {
await this.reply('回复的内容过长,已转为文本模式') await this.reply('回复的内容过长,已转为文本模式')
} }
await this.reply(await convertFaces(response, Config.enableRobotAt, e), e.isGroup) await this.reply(await convertFaces(response, Config.enableRobotAt, e), e.isGroup)
@ -915,28 +970,38 @@ export class chatgpt extends plugin {
this.reply(`建议的回复:\n${chatMessage.suggestedResponses}`) this.reply(`建议的回复:\n${chatMessage.suggestedResponses}`)
} }
} }
if (Config.ttsSpace && ttsResponse.length <= Config.ttsAutoFallbackThreshold) { let wav
if (Config.ttsMode === 'vits-uma-genshin-honkai' && Config.ttsSpace) {
try { try {
let wav = await generateAudio(ttsResponse, speaker, '中日混合(中文用[ZH][ZH]包裹起来,日文用[JA][JA]包裹起来)') wav = await generateAudio(ttsResponse, speaker, '中日混合(中文用[ZH][ZH]包裹起来,日文用[JA][JA]包裹起来)')
try {
let sendable = await uploadRecord(wav)
if (sendable) {
await e.reply(sendable)
} else {
// 如果合成失败尝试使用ffmpeg合成
await e.reply(segment.record(wav))
}
} catch (err) {
logger.error(err)
await e.reply(segment.record(wav))
}
} catch (err) { } catch (err) {
logger.error(err) logger.error(err)
await this.reply('合成语音发生错误~') await this.reply('合成语音发生错误~')
} }
} else if (!Config.ttsSpace) { } else if (Config.ttsMode === 'azure' && Config.azureTTSKey) {
wav = await AzureTTS.generateAudio(ttsResponse, {
speaker: speaker
})
} else {
await this.reply('你没有配置转语音API哦') await this.reply('你没有配置转语音API哦')
} }
try {
try {
let sendable = await uploadRecord(wav)
if (sendable) {
await e.reply(sendable)
} else {
// 如果合成失败尝试使用ffmpeg合成
await e.reply(segment.record(wav))
}
} catch (err) {
logger.error(err)
await e.reply(segment.record(wav))
}
} catch (err) {
logger.error(err)
await this.reply('合成语音发生错误~')
}
} else if (userSetting.usePicture || (Config.autoUsePicture && response.length > Config.autoUsePictureThreshold)) { } else if (userSetting.usePicture || (Config.autoUsePicture && response.length > Config.autoUsePictureThreshold)) {
// todo use next api of chatgpt to complete incomplete respoonse // todo use next api of chatgpt to complete incomplete respoonse
try { try {

View file

@ -1,6 +1,6 @@
import { Config } from './utils/config.js' import { Config } from './utils/config.js'
import { speakers } from './utils/tts.js' import { speakers } from './utils/tts.js'
import AzureTTS from './utils/tts/microsoft-azure.js'
// 支持锅巴 // 支持锅巴
export function supportGuoba () { export function supportGuoba () {
return { return {
@ -73,15 +73,47 @@ export function supportGuoba () {
bottomHelpMessage: '全局默认以语音形式回复,使用默认角色音色', bottomHelpMessage: '全局默认以语音形式回复,使用默认角色音色',
component: 'Switch' component: 'Switch'
}, },
{
field: 'ttsMode',
label: '语音模式源',
bottomHelpMessage: '语音模式下使用何种语音源进行文本->音频转换',
component: 'Select',
componentProps: {
options: [
{
label: 'vits-uma-genshin-honkai',
value: 'vits-uma-genshin-honkai'
},
{
label: '微软Azure',
value: 'azure'
}
]
}
},
{ {
field: 'defaultTTSRole', field: 'defaultTTSRole',
label: '语音模式默认角色', label: '语音模式默认角色vits-uma-genshin-honkai',
bottomHelpMessage: '语音模式下,未指定角色时使用的角色。若留空,将使用随机角色回复。若用户通过指令指定了角色,将忽略本设定', bottomHelpMessage: 'vits-uma-genshin-honkai语音模式下,未指定角色时使用的角色。若留空,将使用随机角色回复。若用户通过指令指定了角色,将忽略本设定',
component: 'Select', component: 'Select',
componentProps: { componentProps: {
options: speakers.concat('随机').map(s => { return { label: s, value: s } }) options: speakers.concat('随机').map(s => { return { label: s, value: s } })
} }
}, },
{
field: 'azureTTSSpeaker',
label: '语音模式默认角色微软Azure',
bottomHelpMessage: '微软Azure语音模式下未指定角色时使用的角色。若用户通过指令指定了角色将忽略本设定',
component: 'Select',
componentProps: {
options: AzureTTS.supportConfigurations.map(item => {
return {
label: `${item.name}-${item.gender}-${item.languageDetail}`,
value: item.code
}
})
}
},
{ {
field: 'ttsRegex', field: 'ttsRegex',
label: '语音过滤正则表达式', label: '语音过滤正则表达式',
@ -509,10 +541,21 @@ export function supportGuoba () {
}, },
{ {
field: 'ttsSpace', field: 'ttsSpace',
label: '语音转换API地址', label: 'vits-uma-genshin-honkai语音转换API地址',
bottomHelpMessage: '前往duplicate空间https://huggingface.co/spaces/ikechan8370/vits-uma-genshin-honkai后查看api地址', bottomHelpMessage: '前往duplicate空间https://huggingface.co/spaces/ikechan8370/vits-uma-genshin-honkai后查看api地址',
component: 'Input' component: 'Input'
}, },
{
field: 'azureTTSKey',
label: 'Azure语音服务密钥',
component: 'Input'
},
{
field: 'azureTTSRegion',
label: 'Azure语音服务区域',
bottomHelpMessage: '例如japaneast',
component: 'Input'
},
{ {
field: 'huggingFaceReverseProxy', field: 'huggingFaceReverseProxy',
label: '语音转换huggingface反代', label: '语音转换huggingface反代',

View file

@ -8,6 +8,7 @@
"@fastify/static": "^6.9.0", "@fastify/static": "^6.9.0",
"@slack/bolt": "^3.13.0", "@slack/bolt": "^3.13.0",
"@waylaidwanderer/chatgpt-api": "^1.33.2", "@waylaidwanderer/chatgpt-api": "^1.33.2",
"asn1.js": "^5.0.0",
"chatgpt": "^5.1.1", "chatgpt": "^5.1.1",
"delay": "^5.0.0", "delay": "^5.0.0",
"diff": "^5.1.0", "diff": "^5.1.0",
@ -17,6 +18,7 @@
"https-proxy-agent": "5.0.1", "https-proxy-agent": "5.0.1",
"keyv": "^4.5.2", "keyv": "^4.5.2",
"keyv-file": "^0.2.0", "keyv-file": "^0.2.0",
"microsoft-cognitiveservices-speech-sdk": "^1.27.0",
"node-fetch": "^3.3.1", "node-fetch": "^3.3.1",
"openai": "^3.2.1", "openai": "^3.2.1",
"random": "^4.1.0", "random": "^4.1.0",

View file

@ -29,7 +29,7 @@ try {
} }
_puppeteer = new Puppeteer(puppeteerCfg) _puppeteer = new Puppeteer(puppeteerCfg)
} catch (e) { } catch (e) {
logger.warn('未能加载puppeteer尝试降级到Yunzai的puppeteer尝试') logger.debug('未能加载puppeteer尝试降级到Yunzai的puppeteer尝试')
_puppeteer = puppeteer _puppeteer = puppeteer
} }

View file

@ -104,6 +104,10 @@ const defaultConfig = {
slackClaudeSpecifiedChannel: '', slackClaudeSpecifiedChannel: '',
cloudTranscode: 'https://silk.201666.xyz', cloudTranscode: 'https://silk.201666.xyz',
cloudMode: 'url', cloudMode: 'url',
ttsMode: 'vits-uma-genshin-honkai', // or azure
azureTTSKey: '',
azureTTSRegion: '',
azureTTSSpeaker: 'zh-CN-XiaochenNeural',
version: 'v2.5.7' version: 'v2.5.7'
} }
const _path = process.cwd() const _path = process.cwd()

View file

@ -0,0 +1,246 @@
import crypto from 'crypto'
import { mkdirs } from '../common.js'
import { Config } from '../config.js'
let sdk
try {
sdk = (await import('microsoft-cognitiveservices-speech-sdk')).default
} catch (err) {
logger.warn('未安装microsoft-cognitiveservices-speech-sdk无法使用微软Azure语音源')
}
async function generateAudio (text, option = {}) {
if (!sdk) {
throw new Error('未安装microsoft-cognitiveservices-speech-sdk无法使用微软Azure语音源')
}
let subscriptionKey = Config.azureTTSKey
let serviceRegion = Config.azureTTSRegion
const _path = process.cwd()
mkdirs(`${_path}/data/chatgpt/tts/azure`)
let filename = `${_path}/data/chatgpt/tts/azure/${crypto.randomUUID()}.wav`
let audioConfig = sdk.AudioConfig.fromAudioFileOutput(filename)
let speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion)
// speechConfig.speechSynthesisLanguage = option?.language || 'zh-CN'
logger.info('using speaker: ' + option?.speaker || 'zh-CN-YunyeNeural')
speechConfig.speechSynthesisVoiceName = option?.speaker || 'zh-CN-YunyeNeural'
let synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig)
return new Promise((resolve, reject) => {
synthesizer.speakTextAsync(text, result => {
if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
console.log('synthesis finished.')
} else {
console.error('Speech synthesis canceled, ' + result.errorDetails +
'\nDid you update the subscription info?')
}
synthesizer.close()
synthesizer = undefined
resolve(filename)
}, err => {
console.error('err - ' + err)
synthesizer.close()
synthesizer = undefined
reject(err)
})
})
}
const supportConfigurations = [
{
code: 'zh-CN-liaoning-XiaobeiNeural',
name: '晓北',
language: 'zh-CN',
languageDetail: '中文(东北官话,简体)',
gender: '女'
},
{
code: 'zh-CN-henan-YundengNeural',
name: '云登',
language: 'zh-CN',
languageDetail: '中文(中原官话河南,简体)',
gender: '男'
},
{
code: 'zh-CN-shaanxi-XiaoniNeural',
name: '晓妮',
language: 'zh-CN',
languageDetail: '中文(中原官话陕西,简体)',
gender: '女'
},
{
code: 'zh-CN-henan-YundengNeural',
name: '云翔',
language: 'zh-CN',
languageDetail: '中文(冀鲁官话,简体)',
gender: '男'
},
{
code: 'zh-CN-XiaoxiaoNeural',
name: '晓晓',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-YunxiNeural',
name: '云希',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-YunyangNeural',
name: '云扬',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-YunyeNeural',
name: '云野',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-XiaoshuangNeural',
name: '晓双',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaoyouNeural',
name: '晓悠',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaoqiuNeural',
name: '晓秋',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaochenNeural',
name: '晓辰',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaoyanNeural',
name: '晓颜',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaomoNeural',
name: '晓墨',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaoxuanNeural',
name: '晓萱',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaohanNeural',
name: '晓涵',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaoruiNeural',
name: '晓睿',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaomengNeural',
name: '晓梦',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaoyiNeural',
name: '晓伊',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-XiaozhenNeural',
name: '晓甄',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '女'
},
{
code: 'zh-CN-YunfengNeural',
name: '云枫',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-YunhaoNeural',
name: '云皓',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-YunjianNeural',
name: '云健',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-YunxiaNeural',
name: '云夏',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-CN-YunzeNeural',
name: '云泽',
language: 'zh-CN',
languageDetail: '中文(普通话,简体)',
gender: '男'
},
{
code: 'zh-HK-HiuGaaiNeural',
name: '曉佳',
language: 'zh-CN',
languageDetail: '中文(粤语,繁体)',
gender: '女'
},
{
code: 'zh-HK-HiuMaanNeural',
name: '曉曼',
language: 'zh-CN',
languageDetail: '中文(粤语,繁体)',
gender: '女'
},
{
code: 'zh-HK-WanLungNeural',
name: '雲龍',
language: 'zh-CN',
languageDetail: '中文(粤语,繁体)',
gender: '男'
}
]
export default { generateAudio, supportConfigurations }