feat: 功能优化

2025-11-19 21:57:16 +08:00
parent f052b0af65
commit 75abf48bc1
11 changed files with 818 additions and 164 deletions
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
@@ -3,9 +3,15 @@ package cn.iocoder.yudao.module.tik.voice.client;
 import cn.hutool.core.collection.CollUtil;
 import cn.hutool.core.util.StrUtil;
 import cn.iocoder.yudao.framework.common.exception.ServiceException;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
 import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
 import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
 import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
+import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
+import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
+import com.alibaba.dashscope.audio.ttsv2.enrollment.Voice;
+import com.alibaba.dashscope.audio.ttsv2.enrollment.VoiceEnrollmentService;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import lombok.RequiredArgsConstructor;
@@ -17,6 +23,7 @@ import okhttp3.RequestBody;
 import okhttp3.Response;
 import org.springframework.stereotype.Component;

+import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.util.Base64;
@@ -53,28 +60,130 @@ public class CosyVoiceClient {
        if (request == null || StrUtil.isBlank(request.getText())) {
            throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
        }
+        if (StrUtil.isBlank(request.getVoiceId())) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "必须提供 voiceId");
+        }

+        SpeechSynthesizer synthesizer = null;
        try {
-            String payload = objectMapper.writeValueAsString(buildPayload(request));
-            Request httpRequest = new Request.Builder()
-                    .url(properties.getTtsUrl())
-                    .addHeader("Authorization", "Bearer " + properties.getApiKey())
-                    .addHeader("Content-Type", "application/json")
-                    .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
+            log.info("[CosyVoice][开始TTS][voiceId={}, textLength={}, model={}]",
+                    request.getVoiceId(),
+                    request.getText().length(),
+                    StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()));
+
+            // 使用 DashScope SDK 构建参数（严格按文档）
+            SpeechSynthesisParam param = SpeechSynthesisParam.builder()
+                    .apiKey(properties.getApiKey())
+                    .model(StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel()))
+                    .voice(request.getVoiceId())
                    .build();

-            try (Response response = getHttpClient().newCall(httpRequest).execute()) {
-                String body = response.body() != null ? response.body().string() : "";
-                if (!response.isSuccessful()) {
-                    log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
-                    throw buildException(body);
-                }
-                return parseTtsResult(body, request);
+            // 初始化合成器（同步调用传 null）
+            synthesizer = new SpeechSynthesizer(param, null);
+
+            // 阻塞调用，获取完整音频
+            ByteBuffer audioData = synthesizer.call(request.getText());
+
+            if (audioData == null) {
+                throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频数据");
            }
+
+            // 转换为字节数组（严格按照文档：直接使用 array()）
+            byte[] audioBytes = audioData.array();
+
+            log.info("[CosyVoice][TTS合成成功][Request ID: {}, audioSize={}, 首包延迟={}ms]",
+                    synthesizer.getLastRequestId(),
+                    audioBytes.length,
+                    synthesizer.getFirstPackageDelay());
+
+            // 构建返回结果
+            CosyVoiceTtsResult result = new CosyVoiceTtsResult();
+            result.setAudio(audioBytes);
+            result.setFormat(request.getAudioFormat() != null ? request.getAudioFormat() : properties.getAudioFormat());
+            result.setSampleRate(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate());
+            result.setRequestId(synthesizer.getLastRequestId());
+            result.setVoiceId(request.getVoiceId());
+
+            return result;
+
        } catch (ServiceException ex) {
            throw ex;
        } catch (Exception ex) {
-            log.error("[CosyVoice][TTS异常]", ex);
+            log.error("[CosyVoice][TTS异常][voiceId={}, text={}]", request.getVoiceId(), request.getText(), ex);
+            throw exception(VOICE_TTS_FAILED);
+        } finally {
+            // 关闭 WebSocket 连接
+            if (synthesizer != null) {
+                try {
+                    synthesizer.getDuplexApi().close(1000, "任务结束");
+                } catch (Exception e) {
+                    log.warn("[CosyVoice][关闭连接失败]", e);
+                }
+            }
+        }
+    }
+
+    /**
+     * 使用 HTTP API 进行 TTS 合成（备用方案）
+     */
+    private CosyVoiceTtsResult synthesizeViaHttp(CosyVoiceTtsRequest request) throws Exception {
+        String payload = objectMapper.writeValueAsString(buildPayload(request));
+        Request httpRequest = new Request.Builder()
+                .url(properties.getTtsUrl())
+                .addHeader("Authorization", "Bearer " + properties.getApiKey())
+                .addHeader("Content-Type", "application/json")
+                .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
+                .build();
+
+        try (Response response = getHttpClient().newCall(httpRequest).execute()) {
+            String body = response.body() != null ? response.body().string() : "";
+            if (!response.isSuccessful()) {
+                log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
+                throw buildException(body);
+            }
+            return parseTtsResult(body, request);
+        }
+    }
+
+    /**
+     * 调用 CosyVoice 语音复刻接口（声音注册）
+     */
+    public CosyVoiceCloneResult cloneVoice(CosyVoiceCloneRequest request) {
+        if (!properties.isEnabled()) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
+        }
+        if (request == null || StrUtil.isBlank(request.getUrl())) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "复刻音频URL不能为空");
+        }
+        if (request == null || StrUtil.isBlank(request.getTargetModel())) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "复刻模型不能为空");
+        }
+        if (request == null || StrUtil.isBlank(request.getPrefix())) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "音色前缀不能为空");
+        }
+
+        try {
+            log.info("[CosyVoice][开始语音复刻][targetModel={}, prefix={}, url={}]",
+                    request.getTargetModel(), request.getPrefix(), request.getUrl());
+
+            // 使用 DashScope SDK 创建语音复刻
+            VoiceEnrollmentService service = new VoiceEnrollmentService(properties.getApiKey());
+            Voice voice = service.createVoice(request.getTargetModel(), request.getPrefix(), request.getUrl());
+
+            log.info("[CosyVoice][语音复刻成功][Request ID: {}, Voice ID: {}]",
+                    service.getLastRequestId(), voice.getVoiceId());
+
+            // 构建返回结果
+            CosyVoiceCloneResult result = new CosyVoiceCloneResult();
+            result.setVoiceId(voice.getVoiceId());
+            result.setRequestId(service.getLastRequestId());
+
+            return result;
+        } catch (ServiceException ex) {
+            throw ex;
+        } catch (Exception ex) {
+            log.error("[CosyVoice][语音复刻异常][targetModel={}, prefix={}]",
+                    request.getTargetModel(), request.getPrefix(), ex);
            throw exception(VOICE_TTS_FAILED);
        }
    }
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneRequest.java
@@ -0,0 +1,36 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+/**
+ * CosyVoice 语音复刻请求
+ */
+@Data
+public class CosyVoiceCloneRequest {
+
+    /**
+     * 复刻模型（cosyvoice-v1 或 cosyvoice-v2）
+     */
+    private String targetModel;
+
+    /**
+     * 音色自定义前缀（仅允许数字和小写字母，长度<10字符）
+     */
+    private String prefix;
+
+    /**
+     * 音频文件公网URL
+     */
+    private String url;
+
+    /**
+     * 采样率，默认24000
+     */
+    private Integer sampleRate;
+
+    /**
+     * 音频格式，默认wav
+     */
+    private String audioFormat;
+
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceCloneResult.java
@@ -0,0 +1,21 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+/**
+ * CosyVoice 语音复刻结果
+ */
+@Data
+public class CosyVoiceCloneResult {
+
+    /**
+     * 生成的 voice_id
+     */
+    private String voiceId;
+
+    /**
+     * 请求ID
+     */
+    private String requestId;
+
+}
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
@@ -50,6 +50,11 @@ public class CosyVoiceProperties {
     */
    private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";

+    /**
+     * 语音复刻接口地址（声音注册）
+     */
+    private String voiceEnrollmentUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/voice-enrollment";
+
    /**
     * 连接超时时间
     */
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java
@@ -54,6 +54,10 @@ public class TikUserVoiceDO extends TenantBaseDO {
     * 备注信息
     */
    private String note;
+    /**
+     * 复刻音色ID（CosyVoice 语音复刻生成的 voice_id）
+     */
+    private String voiceId;

 }

--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -20,6 +20,8 @@ import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
 import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
 import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
 import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceCloneResult;
 import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
 import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
 import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
@@ -138,20 +140,30 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                .setTranscription(null); // 初始为空，表示未识别
        voiceMapper.insert(voice);

-        // 4. 如果开启自动识别，异步执行识别（添加防重复检查）
-        if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) {
-            // 再次检查是否已经有识别结果（防止并发重复创建）
-            TikUserVoiceDO checkVoice = voiceMapper.selectById(voice.getId());
-            if (StrUtil.isBlank(checkVoice.getTranscription())) {
-                String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
-                log.info("[createVoice][开启自动识别，配音编号({})，文件ID({})，预签名URL({})]",
-                        voice.getId(), fileDO.getId(), fileAccessUrl);
-                asyncTranscribeVoice(voice.getId(), fileAccessUrl);
-            } else {
-                log.info("[createVoice][配音已经有识别结果，跳过自动识别，配音编号({})]", voice.getId());
-            }
+        // 4. 调用阿里云语音复刻服务，生成 voice_id
+        try {
+            log.info("[createVoice][开始语音复刻，配音编号({})，文件ID({})]", voice.getId(), fileDO.getId());
+            String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+
+            CosyVoiceCloneRequest cloneRequest = new CosyVoiceCloneRequest();
+            cloneRequest.setTargetModel("cosyvoice-v2"); // 使用v2模型，效果更好
+            cloneRequest.setPrefix("voice" + voice.getId()); // 音色前缀，格式要求
+            cloneRequest.setUrl(fileAccessUrl);
+
+            CosyVoiceCloneResult cloneResult = cosyVoiceClient.cloneVoice(cloneRequest);
+            String voiceId = cloneResult.getVoiceId();
+
+            // 更新配音记录，保存 voice_id
+            voice.setVoiceId(voiceId);
+            voiceMapper.updateById(voice);
+
+            log.info("[createVoice][语音复刻成功，配音编号({})，voice_id({})]", voice.getId(), voiceId);
+        } catch (Exception e) {
+            log.error("[createVoice][语音复刻失败，配音编号({})，错误信息: {}]", voice.getId(), e.getMessage(), e);
+            // 复刻失败不影响配音记录创建，只记录日志
        }

+
        log.info("[createVoice][用户({})创建配音成功，配音编号({})]", userId, voice.getId());
        return voice.getId();
    }
@@ -361,17 +373,25 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
            }

-            // 获取文件信息，用于获取文件URL
-            FileDO fileDO = fileMapper.selectById(voice.getFileId());
-            if (fileDO == null) {
-                throw exception(VOICE_FILE_NOT_EXISTS);
-            }
+            // 优先使用复刻的 voice_id，如果不存在则使用文件URL（兼容旧数据）
+            if (StrUtil.isNotBlank(voice.getVoiceId())) {
+                log.info("[synthesizeVoice][使用复刻音色ID合成，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
+                voiceId = voice.getVoiceId();
+                transcriptionText = voice.getTranscription();
+            } else {
+                log.info("[synthesizeVoice][使用文件URL合成，配音编号({})]", voiceConfigId);
+                // 获取文件信息，用于获取文件URL
+                FileDO fileDO = fileMapper.selectById(voice.getFileId());
+                if (fileDO == null) {
+                    throw exception(VOICE_FILE_NOT_EXISTS);
+                }

-            // 使用文件URL和识别文本进行合成
-            fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
-            transcriptionText = voice.getTranscription();
-            if (StrUtil.isBlank(transcriptionText)) {
-                throw exception(VOICE_NOT_EXISTS, "配音识别文本为空，请先进行语音识别");
+                // 使用文件URL和识别文本进行合成
+                fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+                transcriptionText = voice.getTranscription();
+                if (StrUtil.isBlank(transcriptionText)) {
+                    throw exception(VOICE_NOT_EXISTS, "配音识别文本为空，请先进行语音识别");
+                }
            }
        }
        // 2. 如果没有配置ID，使用voiceId或fileUrl（系统音色或直接URL方式）
@@ -512,21 +532,31 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
                        voiceConfigId, voice.getUserId(), userId);
                throw exception(VOICE_NOT_EXISTS, "配音不属于当前用户");
            }
-            
-            // 获取文件信息，用于获取文件URL
-            FileDO fileDO = fileMapper.selectById(voice.getFileId());
-            if (fileDO == null) {
-                throw exception(VOICE_FILE_NOT_EXISTS);
+
+            // 优先使用复刻的 voice_id，如果不存在则使用文件URL（兼容旧数据）
+            if (StrUtil.isNotBlank(voice.getVoiceId())) {
+                log.info("[previewVoice][使用复刻音色ID试听，配音编号({})，voice_id({})]", voiceConfigId, voice.getVoiceId());
+                voiceId = voice.getVoiceId();
+                transcriptionText = voice.getTranscription();
+                inputText = StrUtil.blankToDefault(reqVO.getInputText(),
+                        StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
+            } else {
+                log.info("[previewVoice][使用文件URL试听，配音编号({})]", voiceConfigId);
+                // 获取文件信息，用于获取文件URL
+                FileDO fileDO = fileMapper.selectById(voice.getFileId());
+                if (fileDO == null) {
+                    throw exception(VOICE_FILE_NOT_EXISTS);
+                }
+
+                // 使用文件URL和识别文本进行合成
+                fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+                transcriptionText = voice.getTranscription();
+                if (StrUtil.isBlank(transcriptionText)) {
+                    throw exception(VOICE_NOT_EXISTS, "配音识别文本为空，请先进行语音识别");
+                }
+                inputText = StrUtil.blankToDefault(reqVO.getInputText(),
+                        StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
            }
-            
-            // 使用文件URL和识别文本进行合成
-            fileUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
-            transcriptionText = voice.getTranscription();
-            if (StrUtil.isBlank(transcriptionText)) {
-                throw exception(VOICE_NOT_EXISTS, "配音识别文本为空，请先进行语音识别");
-            }
-            inputText = StrUtil.blankToDefault(reqVO.getInputText(), 
-                    StrUtil.blankToDefault(transcriptionText, cosyVoiceProperties.getPreviewText()));
        }
        // 3. 如果没有配置ID，使用系统配音配置（需要前端传voiceId）
        else {
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java
@@ -38,6 +38,9 @@ public class AppTikUserVoiceRespVO {
    @Schema(description = "备注", example = "这是一个测试配音")
    private String note;

+    @Schema(description = "复刻音色ID（CosyVoice 语音复刻生成的 voice_id）")
+    private String voiceId;
+
    @Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
    private LocalDateTime createTime;