From 33b748915d000736aaeb0759b263f291b086b300 Mon Sep 17 00:00:00 2001 From: sion123 <450702724@qq.com> Date: Mon, 2 Feb 2026 02:39:40 +0800 Subject: [PATCH] =?UTF-8?q?fix:=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude/settings.local.json | 3 +- frontend/app/web-gold/src/api/voice.js | 1 + .../app/web-gold/src/composables/useUpload.js | 11 +--- .../app/web-gold/src/views/dh/VoiceCopy.vue | 65 ++++++++++++++++--- .../tik/voice/client/SiliconFlowProvider.java | 3 - .../client/dto/SiliconFlowTtsRequest.java | 2 - .../config/SiliconFlowProviderConfig.java | 5 +- .../service/TikUserVoiceServiceImpl.java | 44 +++++++------ .../voice/vo/AppTikUserVoiceCreateReqVO.java | 5 ++ 9 files changed, 96 insertions(+), 43 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 6e5803fdda..48640218bf 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -82,7 +82,8 @@ "Skill(pptx:*)", "Bash(pdftoppm:*)", "Bash(pip install:*)", - "Bash(where:*)" + "Bash(where:*)", + "mcp__web-reader__webReader" ], "deny": [], "ask": [] diff --git a/frontend/app/web-gold/src/api/voice.js b/frontend/app/web-gold/src/api/voice.js index 3f0c2ea26e..a6d6807589 100644 --- a/frontend/app/web-gold/src/api/voice.js +++ b/frontend/app/web-gold/src/api/voice.js @@ -22,6 +22,7 @@ export const VoiceService = { * @param {string} data.language - 语言(可选) * @param {string} data.gender - 音色类型(可选) * @param {string} data.note - 备注(可选) + * @param {string} data.text - 音频文本(用于语音复刻,前端通过音频识别获取) * @param {string} data.providerType - 供应商类型(可选):cosyvoice-阿里云,siliconflow-硅基流动 * @returns {Promise} */ diff --git a/frontend/app/web-gold/src/composables/useUpload.js b/frontend/app/web-gold/src/composables/useUpload.js index e5153b4f1c..0436b34d07 100644 --- a/frontend/app/web-gold/src/composables/useUpload.js +++ b/frontend/app/web-gold/src/composables/useUpload.js @@ -133,28 +133,23 @@ export function useUpload() { fileType: file.type, groupId, coverBase64, - duration: file.type.startsWith('video/') ? null : undefined // 视频时长由后端处理或前端可选传递 + duration: file.type.startsWith('video/') ? null : undefined }) - // 设置成功状态 state.uploading = false state.status = 'success' state.progress = 100 - // 通知成功 const fileId = completeData.data?.infraFileId || completeData.data?.userFileId - onSuccess && onSuccess(fileId) + const fileUrl = presignedData.data.presignedUrl + onSuccess && onSuccess(fileId, fileUrl) return fileId } catch (error) { - // 设置错误状态 state.uploading = false state.status = 'error' state.error = error.message || '上传失败' - - // 通知错误 onError && onError(error) - throw error } } diff --git a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue index 8fd235b3b1..e2de05f5db 100644 --- a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue +++ b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue @@ -111,6 +111,7 @@ import { PlusOutlined, SearchOutlined, UploadOutlined, PlayCircleOutlined } from import { VoiceService } from '@/api/voice' import { MaterialService } from '@/api/material' import { useUpload } from '@/composables/useUpload' +import useVoiceText from '@gold/hooks/web/useVoiceText' import dayjs from 'dayjs' import BasicLayout from '@/layouts/components/BasicLayout.vue' @@ -123,7 +124,9 @@ const DEFAULT_FORM_DATA = { autoTranscribe: true, language: 'zh-CN', gender: 'female', - note: '' + note: '', + text: '', // 音频文本 + fileUrl: '' // 文件URL(用于获取音频文本) } // ========== 响应式数据 ========== @@ -155,6 +158,9 @@ const formData = reactive({ ...DEFAULT_FORM_DATA }) // ========== Upload Hook ========== const { state: uploadState, upload } = useUpload() +// ========== VoiceText Hook ========== +const { getVoiceText } = useVoiceText() + // ========== 计算属性 ========== const isCreateMode = computed(() => formMode.value === 'create') @@ -307,13 +313,16 @@ const handleCustomUpload = async (options) => { try { const fileId = await upload(file, { fileCategory: 'voice', - groupId: null, // 配音模块不使用groupId + groupId: null, coverBase64: null, onStart: () => {}, onProgress: () => {}, - onSuccess: (id) => { + onSuccess: async (id, fileUrl) => { formData.fileId = id + formData.fileUrl = fileUrl // 保存文件URL message.success('文件上传成功') + // 通过fileId获取播放URL用于语音识别 + await fetchAudioTextById(id) onSuccess?.({ code: 0, data: id }, file) }, onError: (error) => { @@ -330,12 +339,51 @@ const handleCustomUpload = async (options) => { } } + + +// 通过fileId获取音频文本 +const fetchAudioTextById = async (fileId) => { + if (!fileId) return + try { + // 获取音频播放URL + const res = await MaterialService.getAudioPlayUrl(fileId) + if (res.code === 0 && res.data) { + const rawFileUrl = res.data + const results = await getVoiceText([{ audio_url: rawFileUrl }]) + if (results && results.length > 0) { + const text = results[0].value + formData.text = text + if (text) { + message.success('音频文本获取成功') + } + } + } + } catch (error) { + console.error('获取音频文本失败:', error) + } +} + +// 获取音频文本 +const fetchAudioText = async (fileUrl) => { + if (!fileUrl) return + try { + // 阿里云语音识别服务无法访问预签名URL,使用原始URL + const rawFileUrl = extractRawUrl(fileUrl) + const results = await getVoiceText([{ audio_url: rawFileUrl }]) + if (results && results.length > 0) { + const text = results[0].value + formData.text = text + if (text) { + message.success('音频文本获取成功') + } + } + } catch (error) { + console.error('获取音频文本失败:', error) + } +} + const handleFileListChange = (info) => { - // 处理文件列表变化,避免直接修改导致 DOM 错误 const { fileList: newFileList } = info - - // 只更新文件列表,不直接修改文件项的状态 - // 让组件自己管理状态 if (newFileList) { fileList.value = newFileList.filter(item => item.status !== 'removed') } @@ -363,7 +411,8 @@ const handleSubmit = async () => { autoTranscribe: formData.autoTranscribe, language: formData.language, gender: formData.gender, - note: formData.note + note: formData.note, + text: formData.text // 传入音频文本 } : { id: formData.id, diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java index 66fb1637ab..e1ae8a32bf 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java @@ -58,7 +58,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider { sfRequest.setText(getOrDefault(request.getTranscriptionText(), config.getPreviewText())); sfRequest.setAudio(AUDIO_MIME_TYPE + base64Audio); - // 调用上传参考音频 API String url = config.getBaseUrl() + config.getVoiceUploadUrl(); String requestBody = JSONUtil.toJsonStr(sfRequest); log.debug("[SiliconFlowProvider][请求体]{}", requestBody); @@ -123,7 +122,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider { .responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat())) .build(); - // 调用文本转语音 API String url = config.getBaseUrl() + config.getTtsUrl(); String requestBody = JSONUtil.toJsonStr(sfRequest); log.debug("[SiliconFlowProvider][请求体]{}", requestBody); @@ -142,7 +140,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider { throw new RuntimeException("硅基流动文本转语音失败: " + errorBody); } - // 硅基流动直接返回二进制音频数据 byte[] audioBytes = response.bodyBytes(); String base64Audio = Base64.getEncoder().encodeToString(audioBytes); diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java index d9add1609e..5322bf8f08 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java @@ -36,8 +36,6 @@ public class SiliconFlowTtsRequest { */ private Float speed; - - /** * 响应格式(mp3, opus, wav, pcm)(API 参数名:response_format) */ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java index cf072f9bf8..370965ad14 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java @@ -32,8 +32,11 @@ public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderC /** * 默认采样率 + *
mp3: 32000, 44100 (默认 44100)
+ *opus: 48000
+ *wav/pcm: 8000, 16000, 24000, 32000, 44100 (默认 44100)
*/ - private Integer sampleRate = 24000; + private Integer sampleRate = 44100; /** * 默认音频格式 diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java index 2b7db6aaad..4215ac9a74 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java @@ -144,34 +144,38 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService { .setLanguage(StrUtil.blankToDefault(createReqVO.getLanguage(), "zh-CN")) .setGender(StrUtil.blankToDefault(createReqVO.getGender(), "female")) .setNote(createReqVO.getNote()) - .setTranscription(null); // 初始为空,表示未识别 + .setTranscription(createReqVO.getText()); // 使用前端传入的文本 voiceMapper.insert(voice); // 4. 调用语音克隆服务,生成 voice_id - try { - log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]", - voice.getId(), fileDO.getId(), createReqVO.getProviderType()); - String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); + if (StrUtil.isNotBlank(createReqVO.getText())) { + try { + log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]", + voice.getId(), fileDO.getId(), createReqVO.getProviderType()); + String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); - VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType()); - String providerType = getProviderType(createReqVO.getProviderType(), provider); - String model = getModelByProvider(providerType); + VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType()); + String providerType = getProviderType(createReqVO.getProviderType(), provider); + String model = getModelByProvider(providerType); - VoiceCloneRequest cloneRequest = new VoiceCloneRequest(); - cloneRequest.setAudioUrl(fileAccessUrl); - cloneRequest.setModel(model); - cloneRequest.setPrefix("voice" + voice.getId()); - cloneRequest.setTranscriptionText(voice.getTranscription()); + VoiceCloneRequest cloneRequest = new VoiceCloneRequest(); + cloneRequest.setAudioUrl(fileAccessUrl); + cloneRequest.setModel(model); + cloneRequest.setPrefix("voice" + voice.getId()); + cloneRequest.setTranscriptionText(createReqVO.getText()); // 使用前端传入的文本 - VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest); - String voiceId = cloneResult.getVoiceId(); + VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest); + String voiceId = cloneResult.getVoiceId(); - voice.setVoiceId(voiceId); - voiceMapper.updateById(voice); + voice.setVoiceId(voiceId); + voiceMapper.updateById(voice); - log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId); - } catch (Exception e) { - log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e); + log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId); + } catch (Exception e) { + log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e); + } + } else { + log.info("[createVoice][未提供文本,跳过语音复刻,配音编号({})]", voice.getId()); } diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java index 4720d54634..476574c7b5 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java @@ -3,6 +3,7 @@ package cn.iocoder.yudao.module.tik.voice.vo; import io.swagger.v3.oas.annotations.media.Schema; import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Size; import lombok.Data; /** @@ -34,6 +35,10 @@ public class AppTikUserVoiceCreateReqVO { @Schema(description = "备注", example = "这是一个测试配音") private String note; + @Schema(description = "音频文本(用于语音复刻,前端通过音频识别获取)") + @Size(max = 4000, message = "音频文本不能超过 4000 个字符") + private String text; + @Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") private String providerType;