From 33b748915d000736aaeb0759b263f291b086b300 Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Mon, 2 Feb 2026 02:39:40 +0800
Subject: [PATCH 1/8] =?UTF-8?q?fix:=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.claude/settings.local.json | 3 +-
frontend/app/web-gold/src/api/voice.js | 1 +
.../app/web-gold/src/composables/useUpload.js | 11 +---
.../app/web-gold/src/views/dh/VoiceCopy.vue | 65 ++++++++++++++++---
.../tik/voice/client/SiliconFlowProvider.java | 3 -
.../client/dto/SiliconFlowTtsRequest.java | 2 -
.../config/SiliconFlowProviderConfig.java | 5 +-
.../service/TikUserVoiceServiceImpl.java | 44 +++++++------
.../voice/vo/AppTikUserVoiceCreateReqVO.java | 5 ++
9 files changed, 96 insertions(+), 43 deletions(-)
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index 6e5803fdda..48640218bf 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -82,7 +82,8 @@
"Skill(pptx:*)",
"Bash(pdftoppm:*)",
"Bash(pip install:*)",
- "Bash(where:*)"
+ "Bash(where:*)",
+ "mcp__web-reader__webReader"
],
"deny": [],
"ask": []
diff --git a/frontend/app/web-gold/src/api/voice.js b/frontend/app/web-gold/src/api/voice.js
index 3f0c2ea26e..a6d6807589 100644
--- a/frontend/app/web-gold/src/api/voice.js
+++ b/frontend/app/web-gold/src/api/voice.js
@@ -22,6 +22,7 @@ export const VoiceService = {
* @param {string} data.language - 语言(可选)
* @param {string} data.gender - 音色类型(可选)
* @param {string} data.note - 备注(可选)
+ * @param {string} data.text - 音频文本(用于语音复刻,前端通过音频识别获取)
* @param {string} data.providerType - 供应商类型(可选):cosyvoice-阿里云,siliconflow-硅基流动
* @returns {Promise}
*/
diff --git a/frontend/app/web-gold/src/composables/useUpload.js b/frontend/app/web-gold/src/composables/useUpload.js
index e5153b4f1c..0436b34d07 100644
--- a/frontend/app/web-gold/src/composables/useUpload.js
+++ b/frontend/app/web-gold/src/composables/useUpload.js
@@ -133,28 +133,23 @@ export function useUpload() {
fileType: file.type,
groupId,
coverBase64,
- duration: file.type.startsWith('video/') ? null : undefined // 视频时长由后端处理或前端可选传递
+ duration: file.type.startsWith('video/') ? null : undefined
})
- // 设置成功状态
state.uploading = false
state.status = 'success'
state.progress = 100
- // 通知成功
const fileId = completeData.data?.infraFileId || completeData.data?.userFileId
- onSuccess && onSuccess(fileId)
+ const fileUrl = presignedData.data.presignedUrl
+ onSuccess && onSuccess(fileId, fileUrl)
return fileId
} catch (error) {
- // 设置错误状态
state.uploading = false
state.status = 'error'
state.error = error.message || '上传失败'
-
- // 通知错误
onError && onError(error)
-
throw error
}
}
diff --git a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue
index 8fd235b3b1..e2de05f5db 100644
--- a/frontend/app/web-gold/src/views/dh/VoiceCopy.vue
+++ b/frontend/app/web-gold/src/views/dh/VoiceCopy.vue
@@ -111,6 +111,7 @@ import { PlusOutlined, SearchOutlined, UploadOutlined, PlayCircleOutlined } from
import { VoiceService } from '@/api/voice'
import { MaterialService } from '@/api/material'
import { useUpload } from '@/composables/useUpload'
+import useVoiceText from '@gold/hooks/web/useVoiceText'
import dayjs from 'dayjs'
import BasicLayout from '@/layouts/components/BasicLayout.vue'
@@ -123,7 +124,9 @@ const DEFAULT_FORM_DATA = {
autoTranscribe: true,
language: 'zh-CN',
gender: 'female',
- note: ''
+ note: '',
+ text: '', // 音频文本
+ fileUrl: '' // 文件URL(用于获取音频文本)
}
// ========== 响应式数据 ==========
@@ -155,6 +158,9 @@ const formData = reactive({ ...DEFAULT_FORM_DATA })
// ========== Upload Hook ==========
const { state: uploadState, upload } = useUpload()
+// ========== VoiceText Hook ==========
+const { getVoiceText } = useVoiceText()
+
// ========== 计算属性 ==========
const isCreateMode = computed(() => formMode.value === 'create')
@@ -307,13 +313,16 @@ const handleCustomUpload = async (options) => {
try {
const fileId = await upload(file, {
fileCategory: 'voice',
- groupId: null, // 配音模块不使用groupId
+ groupId: null,
coverBase64: null,
onStart: () => {},
onProgress: () => {},
- onSuccess: (id) => {
+ onSuccess: async (id, fileUrl) => {
formData.fileId = id
+ formData.fileUrl = fileUrl // 保存文件URL
message.success('文件上传成功')
+ // 通过fileId获取播放URL用于语音识别
+ await fetchAudioTextById(id)
onSuccess?.({ code: 0, data: id }, file)
},
onError: (error) => {
@@ -330,12 +339,51 @@ const handleCustomUpload = async (options) => {
}
}
+
+
+// 通过fileId获取音频文本
+const fetchAudioTextById = async (fileId) => {
+ if (!fileId) return
+ try {
+ // 获取音频播放URL
+ const res = await MaterialService.getAudioPlayUrl(fileId)
+ if (res.code === 0 && res.data) {
+ const rawFileUrl = res.data
+ const results = await getVoiceText([{ audio_url: rawFileUrl }])
+ if (results && results.length > 0) {
+ const text = results[0].value
+ formData.text = text
+ if (text) {
+ message.success('音频文本获取成功')
+ }
+ }
+ }
+ } catch (error) {
+ console.error('获取音频文本失败:', error)
+ }
+}
+
+// 获取音频文本
+const fetchAudioText = async (fileUrl) => {
+ if (!fileUrl) return
+ try {
+ // 阿里云语音识别服务无法访问预签名URL,使用原始URL
+ const rawFileUrl = extractRawUrl(fileUrl)
+ const results = await getVoiceText([{ audio_url: rawFileUrl }])
+ if (results && results.length > 0) {
+ const text = results[0].value
+ formData.text = text
+ if (text) {
+ message.success('音频文本获取成功')
+ }
+ }
+ } catch (error) {
+ console.error('获取音频文本失败:', error)
+ }
+}
+
const handleFileListChange = (info) => {
- // 处理文件列表变化,避免直接修改导致 DOM 错误
const { fileList: newFileList } = info
-
- // 只更新文件列表,不直接修改文件项的状态
- // 让组件自己管理状态
if (newFileList) {
fileList.value = newFileList.filter(item => item.status !== 'removed')
}
@@ -363,7 +411,8 @@ const handleSubmit = async () => {
autoTranscribe: formData.autoTranscribe,
language: formData.language,
gender: formData.gender,
- note: formData.note
+ note: formData.note,
+ text: formData.text // 传入音频文本
}
: {
id: formData.id,
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
index 66fb1637ab..e1ae8a32bf 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
@@ -58,7 +58,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
sfRequest.setText(getOrDefault(request.getTranscriptionText(), config.getPreviewText()));
sfRequest.setAudio(AUDIO_MIME_TYPE + base64Audio);
- // 调用上传参考音频 API
String url = config.getBaseUrl() + config.getVoiceUploadUrl();
String requestBody = JSONUtil.toJsonStr(sfRequest);
log.debug("[SiliconFlowProvider][请求体]{}", requestBody);
@@ -123,7 +122,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
.build();
- // 调用文本转语音 API
String url = config.getBaseUrl() + config.getTtsUrl();
String requestBody = JSONUtil.toJsonStr(sfRequest);
log.debug("[SiliconFlowProvider][请求体]{}", requestBody);
@@ -142,7 +140,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
throw new RuntimeException("硅基流动文本转语音失败: " + errorBody);
}
- // 硅基流动直接返回二进制音频数据
byte[] audioBytes = response.bodyBytes();
String base64Audio = Base64.getEncoder().encodeToString(audioBytes);
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java
index d9add1609e..5322bf8f08 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/SiliconFlowTtsRequest.java
@@ -36,8 +36,6 @@ public class SiliconFlowTtsRequest {
*/
private Float speed;
-
-
/**
* 响应格式(mp3, opus, wav, pcm)(API 参数名:response_format)
*/
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java
index cf072f9bf8..370965ad14 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java
@@ -32,8 +32,11 @@ public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderC
/**
* 默认采样率
+ *
mp3: 32000, 44100 (默认 44100)
+ * opus: 48000
+ * wav/pcm: 8000, 16000, 24000, 32000, 44100 (默认 44100)
*/
- private Integer sampleRate = 24000;
+ private Integer sampleRate = 44100;
/**
* 默认音频格式
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
index 2b7db6aaad..4215ac9a74 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -144,34 +144,38 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.setLanguage(StrUtil.blankToDefault(createReqVO.getLanguage(), "zh-CN"))
.setGender(StrUtil.blankToDefault(createReqVO.getGender(), "female"))
.setNote(createReqVO.getNote())
- .setTranscription(null); // 初始为空,表示未识别
+ .setTranscription(createReqVO.getText()); // 使用前端传入的文本
voiceMapper.insert(voice);
// 4. 调用语音克隆服务,生成 voice_id
- try {
- log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]",
- voice.getId(), fileDO.getId(), createReqVO.getProviderType());
- String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+ if (StrUtil.isNotBlank(createReqVO.getText())) {
+ try {
+ log.info("[createVoice][开始语音复刻,配音编号({}),文件ID({}),供应商({})]",
+ voice.getId(), fileDO.getId(), createReqVO.getProviderType());
+ String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
- VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
- String providerType = getProviderType(createReqVO.getProviderType(), provider);
- String model = getModelByProvider(providerType);
+ VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
+ String providerType = getProviderType(createReqVO.getProviderType(), provider);
+ String model = getModelByProvider(providerType);
- VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
- cloneRequest.setAudioUrl(fileAccessUrl);
- cloneRequest.setModel(model);
- cloneRequest.setPrefix("voice" + voice.getId());
- cloneRequest.setTranscriptionText(voice.getTranscription());
+ VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
+ cloneRequest.setAudioUrl(fileAccessUrl);
+ cloneRequest.setModel(model);
+ cloneRequest.setPrefix("voice" + voice.getId());
+ cloneRequest.setTranscriptionText(createReqVO.getText()); // 使用前端传入的文本
- VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
- String voiceId = cloneResult.getVoiceId();
+ VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
+ String voiceId = cloneResult.getVoiceId();
- voice.setVoiceId(voiceId);
- voiceMapper.updateById(voice);
+ voice.setVoiceId(voiceId);
+ voiceMapper.updateById(voice);
- log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId);
- } catch (Exception e) {
- log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
+ log.info("[createVoice][语音复刻成功,配音编号({}),voice_id({})]", voice.getId(), voiceId);
+ } catch (Exception e) {
+ log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
+ }
+ } else {
+ log.info("[createVoice][未提供文本,跳过语音复刻,配音编号({})]", voice.getId());
}
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
index 4720d54634..476574c7b5 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
@@ -3,6 +3,7 @@ package cn.iocoder.yudao.module.tik.voice.vo;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
+import jakarta.validation.constraints.Size;
import lombok.Data;
/**
@@ -34,6 +35,10 @@ public class AppTikUserVoiceCreateReqVO {
@Schema(description = "备注", example = "这是一个测试配音")
private String note;
+ @Schema(description = "音频文本(用于语音复刻,前端通过音频识别获取)")
+ @Size(max = 4000, message = "音频文本不能超过 4000 个字符")
+ private String text;
+
@Schema(description = "供应商类型:cosyvoice-阿里云,siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
private String providerType;
From 5cee70413208b1df0dc3bf57be8558bf5283175f Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Mon, 2 Feb 2026 22:36:20 +0800
Subject: [PATCH 2/8] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
frontend/app/web-gold/package.json | 1 +
.../web-gold/src/components/VoiceSelector.vue | 100 ++++++++++++++++--
frontend/app/web-gold/src/main.js | 1 +
frontend/package.json | 1 +
.../service/TikUserVoiceServiceImpl.java | 21 ++--
5 files changed, 108 insertions(+), 16 deletions(-)
diff --git a/frontend/app/web-gold/package.json b/frontend/app/web-gold/package.json
index 53f3805328..0ba2288658 100644
--- a/frontend/app/web-gold/package.json
+++ b/frontend/app/web-gold/package.json
@@ -23,6 +23,7 @@
"@tailwindcss/vite": "^4.1.14",
"ai": "^6.0.39",
"ant-design-vue": "^4.2.6",
+ "aplayer": "^1.10.1",
"dayjs": "^1.11.18",
"markdown-it": "^14.1.0",
"path-to-regexp": "^6.3.0",
diff --git a/frontend/app/web-gold/src/components/VoiceSelector.vue b/frontend/app/web-gold/src/components/VoiceSelector.vue
index 2eb8e21b43..1117dcc239 100644
--- a/frontend/app/web-gold/src/components/VoiceSelector.vue
+++ b/frontend/app/web-gold/src/components/VoiceSelector.vue
@@ -27,19 +27,32 @@
试听
+
+
+
diff --git a/frontend/app/web-gold/src/main.js b/frontend/app/web-gold/src/main.js
index 95b3e2706a..26b7c711d1 100644
--- a/frontend/app/web-gold/src/main.js
+++ b/frontend/app/web-gold/src/main.js
@@ -3,6 +3,7 @@ import { createPinia } from 'pinia'
import Antd from 'ant-design-vue'
import 'normalize.css'
import 'ant-design-vue/dist/reset.css'
+import 'aplayer/dist/APlayer.min.css'
import piniaPluginPersistedstate from 'pinia-plugin-persistedstate';
import 'dayjs/locale/zh-cn';
diff --git a/frontend/package.json b/frontend/package.json
index d5ae75eadd..ed300c22a2 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -12,6 +12,7 @@
"description": "",
"dependencies": {
"@types/node": "^25.0.6",
+ "aplayer": "^1.10.1",
"axios": "^1.12.2",
"github-markdown-css": "^5.8.1",
"localforage": "^1.10.0",
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
index 4215ac9a74..a548745307 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -499,9 +499,9 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
log.info("[previewVoice][试听,voiceConfigId={}, voiceId={}, userId={}]",
voiceConfigId, reqVO.getVoiceId(), userId);
- String voiceId = null;
- String fileUrl = null;
- String referenceText = null;
+ String voiceId;
+ String fileUrl;
+ String referenceText;
// 1. 通过语音URL合成
if (StrUtil.isNotBlank(reqVO.getFileUrl()) && StrUtil.isNotBlank(reqVO.getTranscriptionText())) {
@@ -510,6 +510,7 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
? reqVO.getFileUrl()
: fileApi.presignGetUrl(rawFileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
referenceText = reqVO.getTranscriptionText();
+ voiceId = null;
}
// 2. 用户配音
else if (voiceConfigId != null) {
@@ -518,8 +519,10 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
throw exception(VOICE_NOT_EXISTS, "配音不存在");
}
- if (StrUtil.isNotBlank(voice.getVoiceId())) {
- voiceId = voice.getVoiceId();
+ voiceId = voice.getVoiceId();
+ if (StrUtil.isNotBlank(voiceId)) {
+ fileUrl = null;
+ referenceText = null;
} else {
FileDO fileDO = fileMapper.selectById(voice.getFileId());
if (fileDO == null) {
@@ -538,14 +541,14 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
if (StrUtil.isBlank(voiceId)) {
throw exception(VOICE_NOT_EXISTS, "系统配音音色ID不能为空");
}
+ fileUrl = null;
+ referenceText = null;
}
- // 统一处理:使用前端传入的 inputText,否则使用默认试听文本
String finalText = StrUtil.blankToDefault(reqVO.getInputText(), getPreviewText());
-
String instruction = reqVO.getInstruction();
- Float speechRate = reqVO.getSpeechRate() != null ? reqVO.getSpeechRate() : 1.0f;
- Float volume = reqVO.getVolume() != null ? reqVO.getVolume() : 0f;
+ Float speechRate = ObjectUtil.defaultIfNull(reqVO.getSpeechRate(), 1.0f);
+ Float volume = ObjectUtil.defaultIfNull(reqVO.getVolume(), 0f);
String audioFormat = StrUtil.blankToDefault(reqVO.getAudioFormat(), "mp3");
// 缓存
From 42567c457b0603dc2366d45ad263f4970639b193 Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Mon, 2 Feb 2026 22:42:15 +0800
Subject: [PATCH 3/8] =?UTF-8?q?=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../web-gold/src/components/VoiceSelector.vue | 36 +++++++++++++++++--
1 file changed, 34 insertions(+), 2 deletions(-)
diff --git a/frontend/app/web-gold/src/components/VoiceSelector.vue b/frontend/app/web-gold/src/components/VoiceSelector.vue
index 1117dcc239..02cec98c4b 100644
--- a/frontend/app/web-gold/src/components/VoiceSelector.vue
+++ b/frontend/app/web-gold/src/components/VoiceSelector.vue
@@ -49,8 +49,40 @@ const playerContainer = ref(null)
const audioUrl = ref('')
const currentVoiceName = ref('')
-// 默认封面图片(Base64 SVG)
-const defaultCover = 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTAwIiBoZWlnaHQ9IjEwMCIgdmlld0JveD0iMCAwIDEwMCAxMDAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+PHJlY3Qgd2lkdGg9IjEwMCIgaGVpZ2h0PSIxMDAiIGZpbGw9IiMzY4MmY2IiBmaWxsLW9wYWNpdHk9IjAuMSIvPjxwL3N2Zz4='
+// 默认封面图片(音频波形图标)
+const defaultCover = `data:image/svg+xml;base64,${btoa(`
+
+`.trim())}`
// 使用TTS Hook
const {
From 409e976209a33ebc2b8894248211c75aefd3a526 Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Mon, 2 Feb 2026 23:16:38 +0800
Subject: [PATCH 4/8] =?UTF-8?q?=E5=8A=9F=E8=83=BD=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../web-gold/src/components/VoiceSelector.vue | 117 ++++++++++--------
.../web-gold/src/views/kling/IdentifyFace.vue | 14 +--
.../tik/voice/client/SiliconFlowProvider.java | 2 +-
.../config/SiliconFlowProviderConfig.java | 4 +-
.../service/TikUserVoiceServiceImpl.java | 1 +
5 files changed, 76 insertions(+), 62 deletions(-)
diff --git a/frontend/app/web-gold/src/components/VoiceSelector.vue b/frontend/app/web-gold/src/components/VoiceSelector.vue
index 02cec98c4b..f872258a8a 100644
--- a/frontend/app/web-gold/src/components/VoiceSelector.vue
+++ b/frontend/app/web-gold/src/components/VoiceSelector.vue
@@ -1,6 +1,6 @@
-
+
还没有配音,可先在"配音管理"中上传
@@ -19,31 +19,52 @@
size="small"
:disabled="!selectedVoiceId"
:loading="previewLoadingVoiceId === selectedVoiceId"
- @click="handlePreviewCurrentVoice"
+ @click="handleSynthesize"
>
- 试听
+ 合成
+
+
+
+ 下载音频
+
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
index e1ae8a32bf..8070b67f11 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
@@ -66,7 +66,7 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
.header("Authorization", "Bearer " + config.getApiKey())
.header("Content-Type", MediaType.APPLICATION_JSON_VALUE)
.body(requestBody)
- .timeout((int) config.getConnectTimeout().toMillis())
+ .timeout((int) config.getReadTimeout().toMillis())
.execute();
String responseBody = response.body();
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java
index 370965ad14..2c4fc0710a 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/SiliconFlowProviderConfig.java
@@ -64,9 +64,9 @@ public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderC
private Duration connectTimeout = Duration.ofSeconds(10);
/**
- * 读取超时时间(3分钟,提升语音合成成功率)
+ * 读取超时时间(5分钟,提升语音合成成功率)
*/
- private Duration readTimeout = Duration.ofSeconds(180);
+ private Duration readTimeout = Duration.ofSeconds(300);
/**
* 检查是否可用(有 API Key 即可用)
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
index a548745307..5dc3fb6557 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -1,6 +1,7 @@
package cn.iocoder.yudao.module.tik.voice.service;
import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpUtil;
import cn.hutool.json.JSONArray;
From f8e40c039db3afc1185054acc430308088d10b06 Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Tue, 3 Feb 2026 02:00:12 +0800
Subject: [PATCH 5/8] =?UTF-8?q?feat:=E4=BC=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../yudao/module/tik/voice/client/SiliconFlowProvider.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
index 8070b67f11..18b0412d1f 100644
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/SiliconFlowProvider.java
@@ -115,7 +115,7 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
try {
SiliconFlowTtsRequest sfRequest = SiliconFlowTtsRequest.builder()
- .model(getOrDefault(request.getModel(), config.getDefaultModel()))
+ .model(getOrDefault(request.getModel(), getOrDefault(config.getDefaultModel(), "IndexTeam/IndexTTS-2")))
.input(request.getText())
.voice(request.getVoiceId())
.speed(request.getSpeechRate() != null ? request.getSpeechRate() : 1.0f)
From 0e1b6fe64396d3f79550d3d67d1e3a7ff18c2a2a Mon Sep 17 00:00:00 2001
From: sion123 <450702724@qq.com>
Date: Wed, 4 Feb 2026 01:18:16 +0800
Subject: [PATCH 6/8] =?UTF-8?q?feat:=20=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
frontend/app/web-gold/src/api/kling.js | 141 ++---
.../src/components/PipelineProgress.vue | 112 ++++
.../app/web-gold/src/composables/useTTS.js | 159 +++--
.../app/web-gold/src/views/dh/VoiceCopy.vue | 100 ++-
.../web-gold/src/views/kling/IdentifyFace.vue | 131 ++--
.../src/views/kling/hooks/pipeline/states.ts | 124 ++++
.../src/views/kling/hooks/pipeline/types.ts | 126 ++++
.../kling/hooks/pipeline/useSimplePipeline.ts | 287 +++++++++
.../kling/hooks/useDigitalHumanGeneration.ts | 147 ++---
.../kling/hooks/useIdentifyFaceController.ts | 567 ++++++++----------
.../views/kling/hooks/useVoiceGeneration.ts | 199 ++----
.../src/views/kling/types/identify-face.ts | 51 +-
.../file/service/TikUserFileServiceImpl.java | 12 +-
.../tik/file/vo/app/AppTikUserFileRespVO.java | 5 +-
.../module/tik/kling/client/KlingClient.java | 161 ++---
.../dal/dataobject/TikDigitalHumanTaskDO.java | 2 +
.../dal/mysql/TikDigitalHumanTaskMapper.java | 18 +
.../job/DigitalHumanTaskStatusSyncJob.java | 4 +-
.../service/LatentsyncPollingService.java | 134 +++--
19 files changed, 1472 insertions(+), 1008 deletions(-)
create mode 100644 frontend/app/web-gold/src/components/PipelineProgress.vue
create mode 100644 frontend/app/web-gold/src/views/kling/hooks/pipeline/states.ts
create mode 100644 frontend/app/web-gold/src/views/kling/hooks/pipeline/types.ts
create mode 100644 frontend/app/web-gold/src/views/kling/hooks/pipeline/useSimplePipeline.ts
diff --git a/frontend/app/web-gold/src/api/kling.js b/frontend/app/web-gold/src/api/kling.js
index 7a6d32059c..e2dedc2673 100644
--- a/frontend/app/web-gold/src/api/kling.js
+++ b/frontend/app/web-gold/src/api/kling.js
@@ -2,18 +2,66 @@
* 可灵数字人 API
*/
import request from './http'
-import { message } from "ant-design-vue"
import { MaterialService } from './material'
-/**
- * 显示加载提示
- */
-const showLoading = (text) => message.loading(text, 0)
+// ========== 辅助函数 ==========
/**
- * 销毁加载提示
+ * 从视频中提取封面(可选操作)
*/
-const hideLoading = () => message.destroy()
+async function extractVideoCoverOptional(file) {
+ try {
+ const { extractVideoCover } = await import('@/utils/video-cover')
+ const cover = await extractVideoCover(file, {
+ maxWidth: 800,
+ quality: 0.8
+ })
+ return cover.base64
+ } catch {
+ return null
+ }
+}
+
+/**
+ * 执行人脸识别并返回结果
+ */
+async function performFaceIdentification(videoUrl) {
+ const identifyRes = await identifyFace({ video_url: videoUrl })
+ if (identifyRes.code !== 0) {
+ throw new Error(identifyRes.msg || '识别失败')
+ }
+
+ const faceData = identifyRes.data.data?.face_data?.[0]
+ return {
+ sessionId: identifyRes.data.sessionId,
+ faceId: faceData?.face_id || null,
+ startTime: faceData?.start_time || 0,
+ endTime: faceData?.end_time || 0
+ }
+}
+
+/**
+ * 构建标准响应格式
+ */
+function buildIdentifyResponse(fileId, videoUrl, identifyData, isUploadedFile = false) {
+ return {
+ success: true,
+ data: {
+ fileId,
+ videoUrl,
+ sessionId: identifyData.sessionId,
+ faceId: identifyData.faceId,
+ startTime: isUploadedFile
+ ? Math.round(identifyData.startTime * 1000)
+ : identifyData.startTime,
+ endTime: isUploadedFile
+ ? Math.round(identifyData.endTime * 1000)
+ : identifyData.endTime
+ }
+ }
+}
+
+// ========== API 方法 ==========
export function identifyFace(data) {
return request({
@@ -38,93 +86,46 @@ export function getLipSyncTask(taskId) {
})
}
+/**
+ * 识别已上传的视频
+ */
export async function identifyUploadedVideo(videoFile) {
try {
- showLoading('正在识别视频中的人脸...')
- const identifyRes = await identifyFace({ video_url: videoFile.fileUrl })
- hideLoading()
-
- if (identifyRes.code !== 0) {
- throw new Error(identifyRes.msg || '识别失败')
+ const urlRes = await MaterialService.getVideoPlayUrl(videoFile.fileId)
+ if (urlRes.code !== 0 || !urlRes.data) {
+ throw new Error(urlRes.msg || '获取播放链接失败')
}
- return {
- success: true,
- data: {
- fileId: videoFile.id,
- videoUrl: videoFile.fileUrl,
- sessionId: identifyRes.data.sessionId,
- faceId: identifyRes.data.data.face_data[0].face_id || null,
- startTime: identifyRes.data.data.face_data[0].start_time || 0,
- endTime: identifyRes.data.data.face_data[0].end_time || 0
- }
- }
+ const identifyData = await performFaceIdentification(urlRes.data)
+ return buildIdentifyResponse(videoFile.id, urlRes.data, identifyData, false)
} catch (error) {
- hideLoading()
throw error
}
}
+/**
+ * 上传视频并识别
+ */
export async function uploadAndIdentifyVideo(file) {
+ const coverBase64 = await extractVideoCoverOptional(file)
+
try {
- showLoading('正在提取视频封面...')
- let coverBase64 = null
- try {
- const { extractVideoCover } = await import('@/utils/video-cover')
- const cover = await extractVideoCover(file, {
- maxWidth: 800,
- quality: 0.8
- })
- coverBase64 = cover.base64
- } catch (coverError) {
- // 封面提取失败不影响主流程
- }
- hideLoading()
-
- showLoading('正在上传视频...')
-
- // 使用useUpload Hook(注意:这里需要在组件中使用,这里先用MaterialService)
- // TODO: 在组件中集成useUpload Hook
const uploadRes = await MaterialService.uploadFile(file, 'video', coverBase64, null, null)
- hideLoading()
-
if (uploadRes.code !== 0) {
throw new Error(uploadRes.msg || '上传失败')
}
const fileId = uploadRes.data
- showLoading('正在生成播放链接...')
const urlRes = await MaterialService.getVideoPlayUrl(fileId)
- hideLoading()
-
if (urlRes.code !== 0) {
throw new Error(urlRes.msg || '获取播放链接失败')
}
- const videoUrl = urlRes.data
-
- showLoading('正在识别视频中的人脸...')
- const identifyRes = await identifyFace({ video_url: videoUrl })
- hideLoading()
-
- if (identifyRes.code !== 0) {
- throw new Error(identifyRes.msg || '识别失败')
- }
-
- return {
- success: true,
- data: {
- fileId,
- videoUrl,
- sessionId: identifyRes.data.sessionId,
- faceId: identifyRes.data.data.face_data[0].face_id || null,
- startTime: identifyRes.data.data.face_data[0].start_time || 0,
- endTime: identifyRes.data.data.face_data[0].end_time || 0
- }
- }
+ const identifyData = await performFaceIdentification(urlRes.data)
+ return buildIdentifyResponse(fileId, urlRes.data, identifyData, true)
} catch (error) {
- hideLoading()
throw error
}
}
+
diff --git a/frontend/app/web-gold/src/components/PipelineProgress.vue b/frontend/app/web-gold/src/components/PipelineProgress.vue
new file mode 100644
index 0000000000..28fe6c1a74
--- /dev/null
+++ b/frontend/app/web-gold/src/components/PipelineProgress.vue
@@ -0,0 +1,112 @@
+
+
+
+
+
+
+
diff --git a/frontend/app/web-gold/src/composables/useTTS.js b/frontend/app/web-gold/src/composables/useTTS.js
index 34a9b02d1c..4a01745d00 100644
--- a/frontend/app/web-gold/src/composables/useTTS.js
+++ b/frontend/app/web-gold/src/composables/useTTS.js
@@ -7,72 +7,97 @@ import { message } from 'ant-design-vue'
import { VoiceService } from '@/api/voice'
import { normalizeProviderType, VOICE_PROVIDER_TYPES } from '@/config/voiceConfig'
-// 兼容旧代码的导出
+// ========== 常量 ==========
+
+/** 兼容旧代码的导出 */
const TTS_PROVIDERS = VOICE_PROVIDER_TYPES
-const DEFAULT_CONFIG = {
- apiEndpoint: '/api/tik/voice/tts',
- audioFormat: 'mp3',
- supportedFormats: ['mp3', 'wav']
+/** 默认配置 */
+const DEFAULT_CONFIG = {
+ apiEndpoint: '/api/tik/voice/tts',
+ audioFormat: 'mp3',
+ supportedFormats: ['mp3', 'wav']
}
+/** 最大预览缓存数量 */
+const MAX_PREVIEW_CACHE_SIZE = 50
+
+// ========== 类型定义 ==========
+
+/**
+ * @typedef {Object} AudioData
+ * @property {Blob} blob - 音频 Blob
+ * @property {string} objectUrl - 对象 URL
+ * @property {string} format - 音频格式
+ */
+
export function useTTS(options = {}) {
const {
provider = VOICE_PROVIDER_TYPES.SILICONFLOW,
customConfig = {}
} = options
- // 状态管理
+ // 状态管理(移到函数内部,避免模块级状态污染)
const previewAudioCache = new Map()
- const MAX_PREVIEW_CACHE_SIZE = 50
const previewLoadingVoiceId = ref(null)
const playingPreviewVoiceId = ref(null)
const ttsText = ref('')
const speechRate = ref(1.0)
- // 音频实例
+ // 音频实例(移到函数内部)
let previewAudio = null
let previewObjectUrl = ''
- // 获取当前供应商配置
- const getProviderConfig = () => {
+ // ========== 辅助函数 ==========
+
+ function getProviderConfig() {
return DEFAULT_CONFIG
}
/**
* 播放音频预览
- * @param {string} url 音频URL
- * @param {Object} options 播放选项
+ * @param {string} url - 音频 URL
+ * @param {Object} playOptions - 播放选项
+ * @param {boolean} [playOptions.revokeOnEnd=false] - 播放结束后是否释放 URL
+ * @param {Function} [playOptions.onEnded] - 播放结束回调
*/
- const playAudioPreview = (url, options = {}) => {
- if (!url) return message.warning('暂无可试听的音频')
+ function playAudioPreview(url, playOptions = {}) {
+ if (!url) {
+ message.warning('暂无可试听的音频')
+ return
+ }
+ // 停止当前播放
try {
previewAudio?.pause?.()
previewAudio = null
- } catch (error) {
+ } catch {
+ // 忽略停止播放的错误
}
const audio = new Audio(url)
- const cleanup = () => {
- if (options.revokeOnEnd && url.startsWith('blob:')) {
+
+ function cleanup() {
+ if (playOptions.revokeOnEnd && url.startsWith('blob:')) {
URL.revokeObjectURL(url)
- previewObjectUrl === url && (previewObjectUrl = '')
+ if (previewObjectUrl === url) {
+ previewObjectUrl = ''
+ }
}
previewAudio = null
- options.onEnded && options.onEnded()
+ playOptions.onEnded?.()
}
audio.play()
- .then(() => {
+ .then(function() {
previewAudio = audio
audio.onended = cleanup
- audio.onerror = () => {
+ audio.onerror = function() {
cleanup()
message.error('播放失败')
}
})
- .catch(err => {
+ .catch(function() {
cleanup()
message.error('播放失败')
})
@@ -80,10 +105,10 @@ export function useTTS(options = {}) {
/**
* 生成预览缓存键
- * @param {Object} voice 音色对象
+ * @param {Object} voice - 音色对象
* @returns {string} 缓存键
*/
- const generatePreviewCacheKey = (voice) => {
+ function generatePreviewCacheKey(voice) {
const voiceId = voice.voiceId || voice.rawId || voice.id
const text = ttsText.value.trim()
const rate = speechRate.value
@@ -92,12 +117,12 @@ export function useTTS(options = {}) {
/**
* 解码并缓存Base64音频
- * @param {string} audioBase64 Base64编码的音频数据
- * @param {string} format 音频格式
- * @param {string} cacheKey 缓存键
+ * @param {string} audioBase64 - Base64 编码的音频数据
+ * @param {string} [format='mp3'] - 音频格式
+ * @param {string} cacheKey - 缓存键
* @returns {Promise