Merge branch 'master' into 'main'

Master

See merge request root/sionrui!2
This commit is contained in:
2026-02-01 15:06:08 +00:00
11 changed files with 63 additions and 159 deletions

View File

@@ -50,7 +50,7 @@ const {
setSpeechRate, setSpeechRate,
resetPreviewState resetPreviewState
} = useTTS({ } = useTTS({
provider: TTS_PROVIDERS.QWEN provider: TTS_PROVIDERS.SILICONFLOW
}) })
// 当前选中的音色ID // 当前选中的音色ID

View File

@@ -10,28 +10,15 @@ import { normalizeProviderType, VOICE_PROVIDER_TYPES } from '@/config/voiceConfi
// 兼容旧代码的导出 // 兼容旧代码的导出
const TTS_PROVIDERS = VOICE_PROVIDER_TYPES const TTS_PROVIDERS = VOICE_PROVIDER_TYPES
// 供应商默认配置(使用标准化后的键名) const DEFAULT_CONFIG = {
const DEFAULT_CONFIG = {
cosyvoice: {
apiEndpoint: '/api/tik/voice/tts', apiEndpoint: '/api/tik/voice/tts',
audioFormat: 'mp3', audioFormat: 'mp3',
supportedFormats: ['mp3', 'wav'] supportedFormats: ['mp3', 'wav']
},
azure: {
apiEndpoint: '/api/tik/voice/azure/tts',
audioFormat: 'mp3',
supportedFormats: ['mp3', 'wav', 'ogg']
},
aws: {
apiEndpoint: '/api/tik/voice/aws/tts',
audioFormat: 'mp3',
supportedFormats: ['mp3', 'wav', 'ogg']
}
} }
export function useTTS(options = {}) { export function useTTS(options = {}) {
const { const {
provider = VOICE_PROVIDER_TYPES.COSYVOICE, provider = VOICE_PROVIDER_TYPES.SILICONFLOW,
customConfig = {} customConfig = {}
} = options } = options
@@ -49,9 +36,7 @@ export function useTTS(options = {}) {
// 获取当前供应商配置 // 获取当前供应商配置
const getProviderConfig = () => { const getProviderConfig = () => {
const normalizedProvider = normalizeProviderType(provider) return DEFAULT_CONFIG
const config = DEFAULT_CONFIG[normalizedProvider] || DEFAULT_CONFIG.cosyvoice
return { ...config, ...customConfig }
} }
/** /**

View File

@@ -20,17 +20,14 @@ export const VOICE_PROVIDER_OPTIONS = [
{ label: '硅基流动 SiliconFlow', value: VOICE_PROVIDER_TYPES.SILICONFLOW } { label: '硅基流动 SiliconFlow', value: VOICE_PROVIDER_TYPES.SILICONFLOW }
] ]
// 供应商别名映射(兼容旧名称)
export const PROVIDER_ALIAS_MAP = {
[VOICE_PROVIDER_TYPES.QWEN]: VOICE_PROVIDER_TYPES.COSYVOICE
}
/** /**
* 标准化供应商类型(处理别名映射) * 标准化供应商类型
*/ */
export function normalizeProviderType(providerType) { export function normalizeProviderType(providerType) {
if (!providerType) return DEFAULT_VOICE_PROVIDER if (!providerType) return DEFAULT_VOICE_PROVIDER
return PROVIDER_ALIAS_MAP[providerType] || providerType return VOICE_PROVIDER_TYPES[providerType] || providerType
} }
/** /**
@@ -41,21 +38,13 @@ export function getProviderLabel(providerType) {
return option?.label || providerType return option?.label || providerType
} }
/**
* 检查供应商是否支持
*/
export function isProviderSupported(providerType) {
const normalized = normalizeProviderType(providerType)
return Object.values(VOICE_PROVIDER_TYPES).includes(normalized)
}
// 默认导出配置对象 // 默认导出配置对象
export default { export default {
VOICE_PROVIDER_TYPES, VOICE_PROVIDER_TYPES,
DEFAULT_VOICE_PROVIDER, DEFAULT_VOICE_PROVIDER,
VOICE_PROVIDER_OPTIONS, VOICE_PROVIDER_OPTIONS,
PROVIDER_ALIAS_MAP,
normalizeProviderType, normalizeProviderType,
getProviderLabel, getProviderLabel,
isProviderSupported
} }

View File

@@ -57,6 +57,15 @@ public class CosyVoiceClient {
if (!config.isEnabled()) { if (!config.isEnabled()) {
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key"); throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
} }
// 添加详细的参数检查日志
String text = request != null ? request.getText() : null;
log.error("[CosyVoice][TTS参数检查][request={}, text={}, voiceId={}, model={}]",
request != null ? "存在" : "为null",
text != null ? "'" + text + "' (长度:" + text.length() + ")" : "为null",
request != null ? request.getVoiceId() : null,
request != null ? request.getModel() : null);
if (request == null || StrUtil.isBlank(request.getText())) { if (request == null || StrUtil.isBlank(request.getText())) {
throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空"); throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
} }
@@ -87,6 +96,8 @@ public class CosyVoiceClient {
param.setInstruction(request.getInstruction()); param.setInstruction(request.getInstruction());
} }
log.error("[CosyVoice][SDK参数][param={}, text='{}']", param, request.getText());
// 初始化合成器(同步调用传 null // 初始化合成器(同步调用传 null
synthesizer = new SpeechSynthesizer(param, null); synthesizer = new SpeechSynthesizer(param, null);

View File

@@ -92,6 +92,9 @@ public class CosyVoiceProvider implements VoiceCloneProvider {
.preview(request.isPreview()) .preview(request.isPreview())
.build(); .build();
log.error("[CosyVoiceProvider][构建的cosyRequest][text='{}', voiceId={}, fileUrl={}]",
cosyRequest.getText(), cosyRequest.getVoiceId(), cosyRequest.getFileUrl());
// 调用底层 Client // 调用底层 Client
cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult = cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult cosyResult =
cosyVoiceClient.synthesize(cosyRequest); cosyVoiceClient.synthesize(cosyRequest);

View File

@@ -120,7 +120,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
.input(request.getText()) .input(request.getText())
.voice(request.getVoiceId()) .voice(request.getVoiceId())
.speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f) .speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
.sampleRate(request.getSampleRate() != null ? request.getSampleRate() : config.getSampleRate())
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat())) .responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
.build(); .build();
@@ -150,7 +149,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
VoiceTtsResult result = new VoiceTtsResult(); VoiceTtsResult result = new VoiceTtsResult();
result.setAudio(Base64.getDecoder().decode(base64Audio)); result.setAudio(Base64.getDecoder().decode(base64Audio));
result.setFormat(sfRequest.getResponseFormat()); result.setFormat(sfRequest.getResponseFormat());
result.setSampleRate(sfRequest.getSampleRate());
result.setVoiceId(request.getVoiceId()); result.setVoiceId(request.getVoiceId());
log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]", log.info("[SiliconFlowProvider][语音合成成功][format={}, audioSize={}]",

View File

@@ -1,5 +1,6 @@
package cn.iocoder.yudao.module.tik.voice.client.dto; package cn.iocoder.yudao.module.tik.voice.client.dto;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Builder; import lombok.Builder;
import lombok.Data; import lombok.Data;
@@ -20,8 +21,9 @@ public class SiliconFlowTtsRequest {
private String model; private String model;
/** /**
* 待合成文本 * 待合成文本API 参数名input
*/ */
@JsonProperty("input")
private String input; private String input;
/** /**
@@ -34,14 +36,12 @@ public class SiliconFlowTtsRequest {
*/ */
private Float speed; private Float speed;
/**
* 采样率(如 24000
*/
private Integer sampleRate;
/** /**
* 响应格式mp3, wav, pcm * 响应格式mp3, opus, wav, pcmAPI 参数名response_format
*/ */
@JsonProperty("response_format")
private String responseFormat; private String responseFormat;
} }

View File

@@ -427,7 +427,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
transcriptionText, transcriptionText,
reqVO.getInputText(), reqVO.getInputText(),
false); false);
// 移除appendEmotion调用情感通过instruction参数传递
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX, String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
voiceId, voiceId,
@@ -493,128 +492,75 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
Long userId = SecurityFrameworkUtils.getLoginUserId(); Long userId = SecurityFrameworkUtils.getLoginUserId();
Long voiceConfigId = reqVO.getVoiceConfigId(); Long voiceConfigId = reqVO.getVoiceConfigId();
// 增加请求参数日志 log.info("[previewVoice][试听voiceConfigId={}, voiceId={}, userId={}]",
log.info("[previewVoice][开始试听请求参数voiceConfigId={}, voiceId={}, fileUrl={}, userId={}]", voiceConfigId, reqVO.getVoiceId(), userId);
voiceConfigId, reqVO.getVoiceId(), reqVO.getFileUrl(), userId);
String voiceId = null; String voiceId = null;
String fileUrl = null; String fileUrl = null;
String transcriptionText = null; String referenceText = null;
String inputText;
// 1. 如果传入了fileUrl和transcriptionText直接使用通过语音URL合成 // 1. 通过语音URL合成
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) { if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
log.info("[previewVoice][使用语音URL合成用户({})]", userId);
// 如果传入的是预签名URL提取原始URL去除查询参数避免二次签名
String rawFileUrl = extractRawUrl(reqVO.getFileUrl()); String rawFileUrl = extractRawUrl(reqVO.getFileUrl());
// 如果提取后的URL与原始URL不同说明是预签名URL需要重新生成预签名URL fileUrl = rawFileUrl.equals(reqVO.getFileUrl())
// 否则直接使用可能是原始URL或公开URL ? reqVO.getFileUrl()
if (!rawFileUrl.equals(reqVO.getFileUrl())) { : fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
// 重新生成预签名URL确保有效期足够长 referenceText = reqVO.getTranscriptionText();
fileUrl = fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
log.info("[previewVoice][检测到预签名URL已提取原始URL并重新生成预签名URL]");
} else {
fileUrl = reqVO.getFileUrl();
}
transcriptionText = reqVO.getTranscriptionText();
inputText = StrUtil.blankToDefault(reqVO.getInputText(), transcriptionText);
} }
// 2. 如果有配置ID根据配置ID查询配音信息用户配音 // 2. 用户配音
else if (voiceConfigId != null) { else if (voiceConfigId != null) {
log.info("[previewVoice][开始试听,配音编号({}),用户({})]", voiceConfigId, userId);
TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId); TikUserVoiceDO voice = voiceMapper.selectById(voiceConfigId);
log.info("[previewVoice][查询配音结果voice={},配音编号={},用户ID={}]", if (voice == null || !voice.getUserId().equals(userId)) {
voice != null ? "存在" : "不存在", voiceConfigId, userId); throw exception(VOICE_NOT_EXISTS, "配音不存在");
if (voice == null) {
log.warn("[previewVoice][配音不存在,配音编号({}),用户({})]", voiceConfigId, userId);
throw exception(VOICE_NOT_EXISTS, "配音不存在,编号:" + voiceConfigId);
}
if (!voice.getUserId().equals(userId)) {
log.warn("[previewVoice][配音不属于当前用户,配音编号({}),配音用户({}),当前用户({})]",
voiceConfigId, voice.getUserId(), userId);
throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
} }
// 优先使用复刻的 voice_id如果不存在则使用文件URL兼容旧数据
if (StrUtil.isNotBlank(voice.getVoiceId())) { if (StrUtil.isNotBlank(voice.getVoiceId())) {
log.info("[previewVoice][使用复刻音色ID试听配音编号({})voice_id({})]", voiceConfigId, voice.getVoiceId());
voiceId = voice.getVoiceId(); voiceId = voice.getVoiceId();
// 注意:使用 voiceId 时,不依赖 transcriptionText直接使用前端传入的 inputText
transcriptionText = null; // 清除 transcriptionText
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
} else { } else {
log.info("[previewVoice][使用文件URL试听配音编号({})]", voiceConfigId);
// 获取文件信息用于获取文件URL
FileDO fileDO = fileMapper.selectById(voice.getFileId()); FileDO fileDO = fileMapper.selectById(voice.getFileId());
if (fileDO == null) { if (fileDO == null) {
throw exception(VOICE_FILE_NOT_EXISTS); throw exception(VOICE_FILE_NOT_EXISTS);
} }
// 使用文件URL和识别文本进行合成
fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
transcriptionText = voice.getTranscription(); referenceText = voice.getTranscription();
if (StrUtil.isBlank(transcriptionText)) { if (StrUtil.isBlank(referenceText)) {
throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别"); throw exception(VOICE_NOT_EXISTS, "配音识别文本为空,请先进行语音识别");
} }
inputText = StrUtil.blankToDefault(reqVO.getInputText(),
StrUtil.blankToDefault(transcriptionText, getPreviewText()));
} }
} }
// 3. 如果没有配置ID使用系统配音配置需要前端传voiceId // 3. 系统配音
else { else {
log.info("[previewVoice][开始试听,使用系统配音配置,用户({})]", userId);
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId()); voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), getDefaultVoiceId());
if (StrUtil.isBlank(voiceId)) { if (StrUtil.isBlank(voiceId)) {
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空"); throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
} }
inputText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
} }
String finalText = determineSynthesisText( // 统一处理:使用前端传入的 inputText否则使用默认试听文本
transcriptionText, String finalText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
inputText,
true);
// 使用请求参数或默认值
String instruction = reqVO.getInstruction(); String instruction = reqVO.getInstruction();
// 注意instruction参数现在直接传递给CosyVoice不再添加到文本中
Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f; Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f; Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3"); String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
// 构建缓存key使用fileUrl或voiceId // 缓存
String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX, voiceId, fileUrl, finalText,
voiceId, speechRate, volume, instruction, audioFormat, null);
fileUrl,
finalText,
speechRate,
volume,
instruction,
audioFormat,
null);
PreviewCacheEntry previewCache = getPreviewCache(cacheKey); PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
if (previewCache != null) { if (previewCache != null) {
log.info("[previewVoice][使用缓存,配音编号({})voiceId({})cacheKey({})]", return buildPreviewResp(previewCache.getAudioBase64(), previewCache.getFormat(), voiceId);
voiceConfigId, voiceId, cacheKey);
// 缓存命中直接返回缓存的数据Base64
String cachedBase64 = previewCache.getAudioBase64();
return buildPreviewResp(cachedBase64, previewCache.getFormat(), voiceId);
} }
log.info("[previewVoice][调用语音合成服务,配音编号({})voiceId({})fileUrl({}),文本长度({}),供应商({})]", // TTS 合成
voiceConfigId, voiceId, fileUrl, finalText.length(), reqVO.getProviderType()); log.info("[previewVoice][TTSvoiceId={}, textLen={}]", voiceId, finalText.length());
// 使用 Provider 接口进行 TTS 合成(支持前端选择供应商,不传则使用默认)
VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType()); VoiceCloneProvider provider = voiceProviderFactory.getProvider(reqVO.getProviderType());
VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder() VoiceTtsRequest ttsRequest = VoiceTtsRequest.builder()
.text(finalText) .text(finalText)
.voiceId(voiceId) .voiceId(voiceId)
.fileUrl(fileUrl) .fileUrl(fileUrl)
.referenceText(transcriptionText) .referenceText(referenceText)
.model(null) // 使用默认模型 .model(null)
.speechRate(speechRate) .speechRate(speechRate)
.volume(volume) .volume(volume)
.instruction(instruction) .instruction(instruction)
@@ -624,22 +570,13 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.build(); .build();
VoiceTtsResult ttsResult = provider.synthesize(ttsRequest); VoiceTtsResult ttsResult = provider.synthesize(ttsRequest);
String format = defaultFormat(ttsResult.getFormat(), audioFormat); String format = defaultFormat(ttsResult.getFormat(), audioFormat);
String identifier = StrUtil.isNotBlank(voiceId) ? voiceId : "voice";
String objectName = buildFileName(identifier, format);
// 【安全方案】不暴露OSS链接直接返回Base64编码的音频数据
// 这样前端可直接播放无需额外请求也不会暴露OSS存储信息
String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio()); String audioBase64 = Base64.getEncoder().encodeToString(ttsResult.getAudio());
log.info("[previewVoice][合成成功,配音编号({})voiceId({})format({})audioSize={}]",
voiceConfigId, voiceId, format, ttsResult.getAudio().length);
// 缓存Base64数据用于提升响应速度 savePreviewCache(cacheKey, new PreviewCacheEntry(audioBase64, format,
PreviewCacheEntry entry = new PreviewCacheEntry(audioBase64, format, ttsResult.getSampleRate(), ttsResult.getRequestId()); ttsResult.getSampleRate(), ttsResult.getRequestId()));
savePreviewCache(cacheKey, entry);
// 返回Base64数据前端使用 data:audio/...;base64,... 格式播放 log.info("[previewVoice][成功voiceId={}, format={}, size={}]", voiceId, format, ttsResult.getAudio().length);
return buildPreviewResp(audioBase64, format, voiceId); return buildPreviewResp(audioBase64, format, voiceId);
} }
@@ -716,25 +653,6 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容"); throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
} }
private String appendEmotion(String text, String emotion) {
if (StrUtil.isBlank(text)) {
return text;
}
if (StrUtil.isBlank(emotion) || "neutral".equalsIgnoreCase(emotion)) {
return text;
}
String emotionLabel = switch (emotion.toLowerCase()) {
case "happy" -> "高兴";
case "angry" -> "愤怒";
case "sad" -> "悲伤";
case "scared" -> "害怕";
case "disgusted" -> "厌恶";
case "surprised" -> "惊讶";
default -> emotion;
};
return "【情感:" + emotionLabel + "" + text;
}
/** /**
* 从URL中提取原始URL去除查询参数和锚点 * 从URL中提取原始URL去除查询参数和锚点
* *

View File

@@ -24,7 +24,7 @@ public class AppTikVoicePreviewReqVO {
@Size(max = 4000, message = "语音文本不能超过 4000 个字符") @Size(max = 4000, message = "语音文本不能超过 4000 个字符")
private String transcriptionText; private String transcriptionText;
@Schema(description = "输入文本(可选,如果不传则使用配音的识别文本或默认文本)") @Schema(description = "输入文本(可选,不传则使用默认试听文本)")
@Size(max = 4000, message = "输入文本不能超过 4000 个字符") @Size(max = 4000, message = "输入文本不能超过 4000 个字符")
private String inputText; private String inputText;

View File

@@ -231,7 +231,7 @@ yudao:
default-model: cosyvoice-v3-flash default-model: cosyvoice-v3-flash
siliconflow: siliconflow:
enabled: true enabled: true
api-key: sk-epsakfenqnyzoxhmbucsxlhkdqlcbnimslqoivkshalvdozz api-key: sk-kcvifijrafkzxsmnxbgxspnxdvjiaawcbyoiqhmfobykynpx
base-url: https://api.siliconflow.cn base-url: https://api.siliconflow.cn
default-model: IndexTeam/IndexTTS-2 default-model: IndexTeam/IndexTTS-2
ice: ice:

View File

@@ -214,7 +214,7 @@ spring:
yudao: yudao:
voice: voice:
default-provider: cosyvoice default-provider: siliconflow
cosyvoice: cosyvoice:
enabled: true enabled: true
api-key: sk-10c746f8cb8640738f8d6b71af699003 api-key: sk-10c746f8cb8640738f8d6b71af699003
@@ -225,8 +225,8 @@ yudao:
tts-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis tts-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis
voice-enrollment-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment voice-enrollment-url: https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment
siliconflow: siliconflow:
enabled: false enabled: true
api-key: ${SILICONFLOW_API_KEY:} api-key: sk-kcvifijrafkzxsmnxbgxspnxdvjiaawcbyoiqhmfobykynpx
base-url: https://api.siliconflow.cn base-url: https://api.siliconflow.cn
default-model: IndexTeam/IndexTTS-2 default-model: IndexTeam/IndexTTS-2
sample-rate: 24000 sample-rate: 24000