diff --git a/frontend/app/web-gold/src/views/kling/IdentifyFace.vue b/frontend/app/web-gold/src/views/kling/IdentifyFace.vue index d502d347e8..ec4c06ae34 100644 --- a/frontend/app/web-gold/src/views/kling/IdentifyFace.vue +++ b/frontend/app/web-gold/src/views/kling/IdentifyFace.vue @@ -137,55 +137,30 @@ - -
-

配音生成与校验

- - -
- - {{ audioState.generating ? '生成中...' : '生成配音(用于校验时长)' }} - -
- - -
-
-

生成的配音

-
- 音频时长: - {{ audioDurationSec }} 秒 -
-
- 人脸区间: - {{ faceDurationSec }} 秒 -
-
- 校验结果: - - {{ validationPassed ? '✅ 通过' : '❌ 不通过(音频时长不能超过人脸时长)' }} - -
+ +
+ +
+
+ 配音 + {{ audioDurationSec }}秒
-
-
@@ -205,12 +180,26 @@
+ + {{ isPipelineBusy ? '处理中...' : '生成配音并验证' }} + + + + {{ isPipelineBusy ? '处理中...' : '生成数字人视频' }} @@ -250,19 +239,16 @@ const dragOver = ref(false) // Controller 内部直接创建和管理两个子 Hook const controller = useIdentifyFaceController() - // 解构 controller 以简化模板调用 const { // 语音生成相关 ttsText, speechRate, audioState, - canGenerateAudio, generateAudio, // 数字人生成相关 videoState, - identifyState, getVideoPreviewUrl, // 计算属性 @@ -276,7 +262,7 @@ const { audioUrl, validationPassed, - // Pipeline 状态 + // Pipeline 状态(单一状态源) pipelineState, isPipelineBusy, isPipelineReady, @@ -370,20 +356,15 @@ onMounted(async () => { font-weight: 600; } -.card-content h4, -.audio-info h4 { +.card-content h4 { color: var(--text-primary); font-size: 14px; margin-bottom: 12px; } -.card-content p, -.duration-label span:first-child { +.card-content p { color: var(--text-secondary); font-size: 13px; -} - -.card-content p { margin: 0; } @@ -401,24 +382,6 @@ onMounted(async () => { } } -.text-hint { - display: flex; - align-items: center; - gap: 8px; - margin-top: 12px; - padding: 12px 16px; - background: rgba(var(--color-primary), 0.1); - border: 1px solid rgba(var(--color-primary), 0.3); - border-radius: 8px; - font-size: 13px; - color: var(--text-secondary); -} - -.hint-icon { - font-size: 16px; -} - -/* ========== 控制面板 ========== */ .control-group { margin-bottom: 16px; } @@ -635,175 +598,76 @@ onMounted(async () => { border-radius: 8px; } -/* ========== 验证结果 ========== */ -.validation-result { - padding: 16px; - background: var(--bg-primary); - border-radius: 8px; - border: 1px solid var(--border-light); +/* ========== 音频区域 ========== */ +.audio-section { + margin-bottom: 24px; } -.validation-result.validation-passed { - border-color: var(--color-success); - background: rgba(var(--color-success), 0.05); +.audio-generated { + display: flex; + flex-direction: column; + gap: 12px; } -.validation-result.validation-failed { - border-color: var(--color-error); - background: rgba(var(--color-error), 0.05); -} - -.validation-status { +.audio-header { display: flex; align-items: center; - gap: 8px; - margin-bottom: 12px; - padding: 12px; - background: var(--bg-secondary); - border-radius: 6px; + justify-content: space-between; } -.status-icon { - font-size: 18px; -} - -.status-text { - color: var(--text-primary); +.audio-title { font-size: 14px; font-weight: 600; -} - -/* ========== 时长对比进度条 ========== */ -.duration-comparison { - margin-bottom: 16px; - padding: 12px; - background: var(--bg-secondary); - border-radius: 6px; -} - -.duration-bar { - margin-bottom: 12px; - - &:last-child { - margin-bottom: 0; - } -} - -.duration-label { - display: flex; - justify-content: space-between; - align-items: center; - margin-bottom: 8px; - font-size: 13px; -} - -.duration-value { color: var(--text-primary); - font-weight: 600; - font-size: 13px; - padding: 4px 8px; - background: var(--bg-primary); - border-radius: 4px; } -.progress-bar { - height: 8px; - background: var(--bg-primary); - border-radius: 4px; - overflow: hidden; -} - -.progress-fill { - height: 100%; - border-radius: 4px; - transition: width 0.3s; -} - -.audio-bar .progress-fill { - background: var(--color-primary); -} - -.video-bar .progress-fill.success { - background: var(--color-success); -} - -.video-bar .progress-fill.error { - background: var(--color-error); -} - -/* ========== 错误提示 ========== */ -.validation-error { - padding: 12px; - background: var(--bg-secondary); - border: 1px solid var(--border-light); - border-radius: 6px; -} - -.error-message { - color: var(--color-error); - font-size: 13px; - margin: 0 0 12px 0; -} - -.quick-actions { - display: flex; - gap: 8px; -} - -/* ========== 音频生成 ========== */ -.audio-generation-section { - margin-bottom: 24px; - padding: 16px; - background: var(--bg-secondary); - border-radius: 8px; - border: 1px solid var(--border-light); -} - -.generate-audio-row { - margin-bottom: 16px; -} - -.audio-preview { - padding: 16px; - background: var(--bg-primary); - border-radius: 8px; -} - -.duration-info { - display: flex; - justify-content: space-between; - margin-bottom: 8px; - font-size: 13px; -} - -.duration-info .label { +.audio-duration { + font-size: 12px; color: var(--text-secondary); } -.duration-info .value { - color: var(--text-primary); - font-weight: 600; -} - -.duration-info.validation-passed .value { - color: var(--color-success); -} - -.duration-info.validation-failed .value { - color: var(--color-error); -} - -.audio-player { - margin: 16px 0; -} - -.audio-element { +.audio-player-wrapper { width: 100%; } -.regenerate-row { +.audio-player { + width: 100%; + height: 36px; +} + +.validation-warning { + display: flex; + align-items: flex-start; + gap: 8px; + padding: 10px 12px; + background: rgba(var(--color-warning), 0.1); + border: 1px solid rgba(var(--color-warning), 0.3); + border-radius: 6px; + font-size: 13px; +} + +.warning-icon { + flex-shrink: 0; + font-size: 14px; +} + +.warning-text { + color: var(--text-secondary); + line-height: 1.4; +} + +.audio-prompt { text-align: center; - margin-top: 12px; + padding: 20px; + background: var(--bg-secondary); + border-radius: 8px; + border: 1px dashed var(--border-light); +} + +.audio-prompt p { + margin: 0 0 16px 0; + font-size: 14px; + color: var(--text-secondary); } /* ========== 操作按钮 ========== */ @@ -823,18 +687,6 @@ onMounted(async () => { border-radius: 8px; } -.generate-hint { - display: flex; - align-items: center; - gap: 8px; - padding: 12px 16px; - background: rgba(var(--color-warning), 0.1); - border: 1px solid rgba(var(--color-warning), 0.3); - border-radius: 6px; - font-size: 13px; - color: var(--color-warning); -} - /* ========== 响应式 ========== */ @media (max-width: 1024px) { .kling-content { diff --git a/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts b/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts index e875313cef..dd87b331ed 100644 --- a/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts +++ b/frontend/app/web-gold/src/views/kling/hooks/useDigitalHumanGeneration.ts @@ -1,19 +1,21 @@ /** * @fileoverview useDigitalHumanGeneration Hook - 数字人生成逻辑 + * + * 重构后:不管理识别状态,只提供数据和操作方法 + * 状态由 Pipeline 统一管理 */ import { ref, computed } from 'vue' import { message } from 'ant-design-vue' import type { - UseDigitalHumanGeneration, VideoState, - IdentifyState, + IdentifyResult, Video, } from '../types/identify-face' import { identifyUploadedVideo } from '@/api/kling' import { useUpload } from '@/composables/useUpload' -export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { +export function useDigitalHumanGeneration() { // ========== 状态 ========== const videoState = ref({ uploadedVideo: '', @@ -25,9 +27,8 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { selectorVisible: false, }) - const identifyState = ref({ - identifying: false, - identified: false, + // 识别结果数据(不含状态标志) + const identifyResult = ref({ sessionId: '', faceId: '', faceStartTime: 0, @@ -39,7 +40,15 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { // ========== 计算属性 ========== const faceDuration = computed(function() { - return identifyState.value.faceEndTime - identifyState.value.faceStartTime + return identifyResult.value.faceEndTime - identifyResult.value.faceStartTime + }) + + const hasVideo = computed(function() { + return !!videoState.value.uploadedVideo || !!videoState.value.selectedVideo + }) + + const isIdentified = computed(function() { + return !!identifyResult.value.sessionId }) // ========== 方法 ========== @@ -55,7 +64,7 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { videoState.value.selectedVideo = null videoState.value.previewVideoUrl = '' videoState.value.videoSource = 'upload' - resetIdentifyState() + resetIdentifyResult() } async function handleVideoSelect(video: Video): Promise { @@ -64,67 +73,65 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { videoState.value.videoFile = null videoState.value.videoSource = 'select' videoState.value.selectorVisible = false - resetIdentifyState() - identifyState.value.videoFileId = video.fileId + resetIdentifyResult() + identifyResult.value.videoFileId = video.fileId } - async function performFaceRecognition(): Promise { + /** + * 执行人脸识别 + * 返回识别结果供 Pipeline 使用 + */ + async function performFaceRecognition(): Promise { const hasUploadFile = videoState.value.videoFile const hasSelectedVideo = videoState.value.selectedVideo - if (!hasUploadFile && !hasSelectedVideo) return + if (!hasUploadFile && !hasSelectedVideo) { + throw new Error('请先选择视频') + } - identifyState.value.identifying = true - - try { - if (hasSelectedVideo) { - const res = await identifyUploadedVideo(hasSelectedVideo) as { success: boolean; data: { sessionId: string; faceId: string | null; startTime: number; endTime: number } } - identifyState.value.videoFileId = hasSelectedVideo.fileId - - identifyState.value.sessionId = res.data.sessionId - identifyState.value.faceId = res.data.faceId || '' - identifyState.value.faceStartTime = res.data.startTime || 0 - identifyState.value.faceEndTime = res.data.endTime || 0 - } else { - const file = hasUploadFile! - let coverBase64 = null - try { - const { extractVideoCover } = await import('@/utils/video-cover') - const cover = await extractVideoCover(file, { maxWidth: 800, quality: 0.8 }) - coverBase64 = cover.base64 - } catch { - // 封面提取失败不影响主流程 - } - - const fileId = await upload(file, { - fileCategory: 'video', - groupId: null, - coverBase64, - onStart: function() {}, - onProgress: function() {}, - onSuccess: function() {}, - onError: function(err: Error) { - message.error(err.message || '上传失败') - } - }) - - identifyState.value.videoFileId = fileId - identifyState.value.sessionId = '' - identifyState.value.faceId = '' - identifyState.value.faceStartTime = 0 - identifyState.value.faceEndTime = 0 + if (hasSelectedVideo) { + const res = await identifyUploadedVideo(hasSelectedVideo) as { + success: boolean; + data: { sessionId: string; faceId: string | null; startTime: number; endTime: number } + } + identifyResult.value.videoFileId = hasSelectedVideo.fileId + identifyResult.value.sessionId = res.data.sessionId + identifyResult.value.faceId = res.data.faceId || '' + identifyResult.value.faceStartTime = res.data.startTime || 0 + identifyResult.value.faceEndTime = res.data.endTime || 0 + } else { + const file = hasUploadFile! + let coverBase64 = null + try { + const { extractVideoCover } = await import('@/utils/video-cover') + const cover = await extractVideoCover(file, { maxWidth: 800, quality: 0.8 }) + coverBase64 = cover.base64 + } catch { + // 封面提取失败不影响主流程 } - identifyState.value.identified = true + const fileId = await upload(file, { + fileCategory: 'video', + groupId: null, + coverBase64, + onStart: function() {}, + onProgress: function() {}, + onSuccess: function() {}, + onError: function(err: Error) { + message.error(err.message || '上传失败') + } + }) - // 识别完成,不显示提示信息 - } catch (error: unknown) { - const err = error as Error - message.error(err.message || '识别失败') - throw error - } finally { - identifyState.value.identifying = false + identifyResult.value.videoFileId = fileId + // 上传后需要再调用识别接口获取人脸信息 + // 暂时清空,等待后续识别 + identifyResult.value.sessionId = '' + identifyResult.value.faceId = '' + identifyResult.value.faceStartTime = 0 + identifyResult.value.faceEndTime = 0 } + + return { ...identifyResult.value } } function resetVideoState(): void { @@ -135,7 +142,7 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { videoState.value.videoSource = null videoState.value.previewVideoUrl = '' videoState.value.selectorVisible = false - resetIdentifyState() + resetIdentifyResult() } function getVideoPreviewUrl(video: Video): string { @@ -149,22 +156,23 @@ export function useDigitalHumanGeneration(): UseDigitalHumanGeneration { return 'data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjExMCIgdmlld0JveD0iMCAwIDIwMCAxMTAiIGZpbGw9Im5vbmUiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyI+CjxyZWN0IHdpZHRoPSIyMDAiIGhlaWdodD0iMTEwIiBmaWxsPSIjMzc0MTUxIi8+CjxwYXRoIGQ9Ik04NSA0NUwxMTUgNjVMMTA1IDg1TDc1IDc1TDg1IDQ1WiIgZmlsbD0iIzU3MjY1MSIvPgo8L3N2Zz4K' } - function resetIdentifyState(): void { - identifyState.value.identified = false - identifyState.value.sessionId = '' - identifyState.value.faceId = '' - identifyState.value.videoFileId = null + function resetIdentifyResult(): void { + identifyResult.value.sessionId = '' + identifyResult.value.faceId = '' + identifyResult.value.videoFileId = null } return { videoState, - identifyState, + identifyResult, + hasVideo, + isIdentified, faceDuration, handleFileUpload, handleVideoSelect, performFaceRecognition, resetVideoState, - resetIdentifyState, + resetIdentifyResult, getVideoPreviewUrl, } } diff --git a/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts b/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts index 64120f1aea..245b0c1d97 100644 --- a/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts +++ b/frontend/app/web-gold/src/views/kling/hooks/useIdentifyFaceController.ts @@ -1,7 +1,11 @@ /** - * @fileoverview useIdentifyFaceController Hook - 主控制器 + * @fileoverview useIdentifyFaceController Hook - 主控制器(重构版) * - * 职责:协调语音、视频、Pipeline 各个子模块,提供统一的外部接口 + * 设计理念: + * - 所有操作统一通过 Pipeline 状态机 + * - 移除独立的 identifyState,使用 pipeline 状态 + * - 点击"生成配音" → 运行到 ready 状态 + * - 点击"生成数字人视频" → 从 ready 继续 → completed * * 模块依赖关系: * ┌─────────────────────────────────────────────────┐ @@ -9,7 +13,7 @@ * │ ┌──────────────┐ ┌──────────────┐ ┌───────────┐│ * │ │ Voice │ │ Digital │ │ Pipeline ││ * │ │ Generation │ │ Human │ │ ││ - * │ │ │ │ Generation │ │ ││ + * │ │ │ │ Generation │ │ 状态机 ││ * │ └──────────────┘ └──────────────┘ └───────────┘│ * └─────────────────────────────────────────────────┘ */ @@ -17,7 +21,6 @@ import { computed } from 'vue' import { message } from 'ant-design-vue' import type { - UseIdentifyFaceController, VoiceMeta, } from '../types/identify-face' import { useVoiceGeneration } from './useVoiceGeneration' @@ -31,36 +34,35 @@ const MAX_TEXT_LENGTH = 4000 /** * 主控制器 Hook */ -export function useIdentifyFaceController(): UseIdentifyFaceController { +export function useIdentifyFaceController() { // 子 Hooks const voice = useVoiceGeneration() const digitalHuman = useDigitalHumanGeneration() - // Pipeline 流程配置(使用新的极简状态机) + // Pipeline 流程配置 const pipeline = useSimplePipeline({ uploadVideo: async (_file: File) => { // 上传已经在 handleFileUpload 中处理 - // 这里直接返回 fileId - return digitalHuman.identifyState.value.videoFileId || '' + return digitalHuman.identifyResult.value.videoFileId || '' }, recognizeFromLibrary: async (video: any) => { await digitalHuman.handleVideoSelect(video) - await digitalHuman.performFaceRecognition() + const result = await digitalHuman.performFaceRecognition() return { - sessionId: digitalHuman.identifyState.value.sessionId, - faceId: digitalHuman.identifyState.value.faceId, - startTime: digitalHuman.identifyState.value.faceStartTime, - endTime: digitalHuman.identifyState.value.faceEndTime, + sessionId: result.sessionId, + faceId: result.faceId, + startTime: result.faceStartTime, + endTime: result.faceEndTime, duration: digitalHuman.faceDuration.value, } }, recognizeUploaded: async (_fileId: string | number) => { - await digitalHuman.performFaceRecognition() + const result = await digitalHuman.performFaceRecognition() return { - sessionId: digitalHuman.identifyState.value.sessionId, - faceId: digitalHuman.identifyState.value.faceId, - startTime: digitalHuman.identifyState.value.faceStartTime, - endTime: digitalHuman.identifyState.value.faceEndTime, + sessionId: result.sessionId, + faceId: result.faceId, + startTime: result.faceStartTime, + endTime: result.faceEndTime, duration: digitalHuman.faceDuration.value, } }, @@ -93,30 +95,24 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { const hasVideo = digitalHuman.videoState.value.uploadedVideo || digitalHuman.videoState.value.selectedVideo const hasBasicConfig = hasText && hasVoice && hasVideo - // 未识别时只需要基础配置 - if (!digitalHuman.identifyState.value.identified) return !!hasBasicConfig + // 未识别或未到 ready 状态需要基础配置 + if (!pipeline.isReady.value) return !!hasBasicConfig - // 已识别后需要音频生成并通过校验 - return !!( - hasBasicConfig && - voice.audioState.value.generated && - validationPassed.value - ) + // 已到 ready 状态可以生成 + return true }) /** 最大文本长度(根据人脸时长动态计算) */ const maxTextLength = computed(() => { - const isIdentified = digitalHuman.identifyState.value.identified const faceDuration = digitalHuman.faceDuration.value - if (!isIdentified || faceDuration <= 0) return MAX_TEXT_LENGTH + if (faceDuration <= 0) return MAX_TEXT_LENGTH return Math.min(MAX_TEXT_LENGTH, Math.floor(voice.suggestedMaxChars.value * 1.2)) }) /** 文本框占位符提示 */ const textareaPlaceholder = computed(() => { - const isIdentified = digitalHuman.identifyState.value.identified const faceDuration = digitalHuman.faceDuration.value - if (isIdentified && faceDuration > 0) { + if (faceDuration > 0) { return `请输入文案,建议不超过${voice.suggestedMaxChars.value}字以确保与视频匹配` } return '请输入你想让角色说话的内容' @@ -131,12 +127,6 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { /** 音频时长显示(秒) */ const audioDurationSec = computed(() => (voice.audioState.value.durationMs / 1000).toFixed(1)) - /** 是否显示生成提示 */ - const showGenerateHint = computed(() => - digitalHuman.identifyState.value.identified && - (!voice.audioState.value.generated || !validationPassed.value) - ) - /** 音频播放 URL */ const audioUrl = computed(() => { const audio = voice.audioState.value.generated @@ -145,12 +135,12 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { }) /** - * 校验是否通过(计算属性) - * 规则:音频时长 <= 人脸时长(Kling 要求音频不能超过人脸区间) + * 校验是否通过 + * 规则:音频时长 <= 人脸时长 */ const validationPassed = computed(() => { - const faceDuration = Number(faceDurationSec.value) - const audioDuration = Number(audioDurationSec.value) + const faceDuration = digitalHuman.faceDuration.value + const audioDuration = voice.audioState.value.durationMs return audioDuration <= faceDuration }) @@ -162,12 +152,48 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { function resetAllStates(): void { voice.resetAudioState() digitalHuman.resetVideoState() - digitalHuman.resetIdentifyState() pipeline.reset() } /** - * 生成数字人视频 - 使用新的 Pipeline API + * 生成配音 - 运行 Pipeline 到 ready 状态 + */ + async function generateAudio(): Promise { + const hasVideo = digitalHuman.videoState.value.uploadedVideo || digitalHuman.videoState.value.selectedVideo + const hasText = voice.ttsText.value.trim() + const hasVoice = voice.selectedVoiceMeta.value + + if (!hasText) { + message.warning('请输入文案内容') + return + } + + if (!hasVoice) { + message.warning('请选择音色') + return + } + + if (!hasVideo) { + message.warning('请先选择视频') + return + } + + try { + // 运行流程到 ready 状态(包含识别、生成、校验) + await pipeline.run({ + videoFile: digitalHuman.videoState.value.videoFile, + selectedVideo: digitalHuman.videoState.value.selectedVideo, + text: voice.ttsText.value, + voice: voice.selectedVoiceMeta.value, + speechRate: voice.speechRate.value, + }) + } catch { + // 错误已在 Pipeline 中处理 + } + } + + /** + * 生成数字人视频 - 从 ready 状态继续到 completed */ async function generateDigitalHuman(): Promise { if (!canGenerate.value) { @@ -189,16 +215,18 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { } try { - // 运行流程到 ready 状态 - await pipeline.run({ - videoFile: digitalHuman.videoState.value.videoFile, - selectedVideo: digitalHuman.videoState.value.selectedVideo, - text, - voice: voiceMeta, - speechRate: voice.speechRate.value, - }) + // 如果还没到 ready 状态,先运行到 ready + if (!pipeline.isReady.value) { + await pipeline.run({ + videoFile: digitalHuman.videoState.value.videoFile, + selectedVideo: digitalHuman.videoState.value.selectedVideo, + text, + voice: voiceMeta, + speechRate: voice.speechRate.value, + }) + } - // 如果到达 ready 状态,自动创建任务 + // 如果到达 ready 状态,创建任务 if (pipeline.isReady.value) { await pipeline.createTask() // 任务提交成功后,重置所有状态 @@ -242,7 +270,7 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { function handleSelectUpload(): void { digitalHuman.videoState.value.videoSource = 'upload' digitalHuman.videoState.value.selectedVideo = null - digitalHuman.resetIdentifyState() + digitalHuman.resetIdentifyResult() pipeline.reset() } @@ -285,19 +313,6 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { // ==================== 返回接口 ==================== - /** - * 包装的音频生成方法(延迟识别) - * 在生成音频前先执行人脸识别 - */ - async function generateAudio(): Promise { - // 如果有视频但未识别,先执行识别 - const hasVideo = digitalHuman.videoState.value.uploadedVideo || digitalHuman.videoState.value.selectedVideo - if (hasVideo && !digitalHuman.identifyState.value.identified) { - await digitalHuman.performFaceRecognition() - } - await voice.generateAudio() - } - return { // 语音生成模块 ttsText: voice.ttsText, @@ -311,13 +326,13 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { // 数字人生成模块 videoState: digitalHuman.videoState, - identifyState: digitalHuman.identifyState, + identifyResult: digitalHuman.identifyResult, + isIdentified: digitalHuman.isIdentified, faceDuration: digitalHuman.faceDuration, - performFaceRecognition: digitalHuman.performFaceRecognition, handleFileUpload: digitalHuman.handleFileUpload, getVideoPreviewUrl: digitalHuman.getVideoPreviewUrl, resetVideoState: digitalHuman.resetVideoState, - resetIdentifyState: digitalHuman.resetIdentifyState, + resetIdentifyResult: digitalHuman.resetIdentifyResult, // 业务方法 generateDigitalHuman, @@ -345,11 +360,10 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { speechRateDisplay, faceDurationSec, audioDurationSec, - showGenerateHint, audioUrl, validationPassed, - // Pipeline 状态 + // Pipeline 状态(单一状态源) pipelineState: pipeline.state, pipelineStateLabel: pipeline.stateLabel, pipelineStateDescription: pipeline.stateDescription, @@ -360,8 +374,6 @@ export function useIdentifyFaceController(): UseIdentifyFaceController { pipelineProgress: pipeline.progress, pipelineCurrentStepIndex: pipeline.currentStepIndex, pipelineError: pipeline.error, - runPipeline: pipeline.run, - createPipelineTask: pipeline.createTask, retryPipeline: pipeline.retry, resetPipeline: pipeline.reset, } diff --git a/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts b/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts index 53663a8d60..51cdf3b67e 100644 --- a/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts +++ b/frontend/app/web-gold/src/views/kling/hooks/useVoiceGeneration.ts @@ -55,8 +55,8 @@ export function useVoiceGeneration(): UseVoiceGeneration { try { const params = { inputText: ttsText.value, - voiceConfigId: voice.rawId || extractIdFromString(voice.id), - speechRate: speechRate.value || DEFAULT_SPEECH_RATE, + voiceConfigId: voice.rawId ?? extractIdFromString(voice.id), + speechRate: speechRate.value, audioFormat: 'mp3' as const, providerType: DEFAULT_VOICE_PROVIDER, } @@ -85,48 +85,60 @@ export function useVoiceGeneration(): UseVoiceGeneration { } } + /** + * 解析音频时长(浏览器环境) + * 使用 HTML5 Audio API,添加安全边距避免精度误差 + */ async function parseAudioDuration(base64Data: string): Promise { const base64 = base64Data.includes(',') ? base64Data.split(',')[1] : base64Data const binaryString = window.atob(base64) const bytes = new Uint8Array(binaryString.length) - for (let i = 0; i < binaryString.length; i++) { + for (let i = 0; i < bytes.length; i++) { bytes[i] = binaryString.charCodeAt(i) } - return new Promise(function(resolve, reject) { + return new Promise(function(resolve, reject) { const blob = new Blob([bytes], { type: 'audio/mp3' }) const audio = new Audio() const objectUrl = URL.createObjectURL(blob) - // 超时机制:5秒后拒绝 const timeoutId = setTimeout(function() { - URL.revokeObjectURL(objectUrl) + cleanup() reject(new Error('音频时长解析超时')) - }, 5000) + }, 10000) - function onLoadedMetadata() { + function cleanup() { clearTimeout(timeoutId) URL.revokeObjectURL(objectUrl) + audio.removeEventListener('loadedmetadata', onLoadedMetadata) + audio.removeEventListener('error', onError) + audio.removeEventListener('canplay', onLoadedMetadata) + } + function onLoadedMetadata() { const duration = audio.duration - if (!isFinite(duration) || duration <= 0 || isNaN(duration)) { - reject(new Error(`音频时长无效: ${duration},请检查音频格式是否正确`)) + if (!isFinite(duration) || duration <= 0) { + cleanup() + reject(new Error(`音频时长无效: ${duration}`)) return } - const durationMs = Math.round(duration * 1000) - console.log('[parseAudioDuration] 音频时长解析成功:', durationMs, 'ms') + // 减去安全边距(200ms),避免因解析误差导致 sound_end_time 超过实际音频时长 + const durationMs = Math.floor(duration * 1000) - 200 + const rawDurationMs = Math.floor(duration * 1000) + console.log('[parseAudioDuration] 解析成功:', durationMs, 'ms (原始:', rawDurationMs, 'ms)') + cleanup() resolve(durationMs) } function onError() { - clearTimeout(timeoutId) - URL.revokeObjectURL(objectUrl) + cleanup() reject(new Error('音频解析失败,请检查音频格式')) } audio.addEventListener('loadedmetadata', onLoadedMetadata) audio.addEventListener('error', onError) + audio.addEventListener('canplay', onLoadedMetadata, { once: true }) audio.src = objectUrl audio.load() }) diff --git a/frontend/app/web-gold/src/views/kling/types/identify-face.ts b/frontend/app/web-gold/src/views/kling/types/identify-face.ts index 78c1f7cd46..c4fbcb8078 100644 --- a/frontend/app/web-gold/src/views/kling/types/identify-face.ts +++ b/frontend/app/web-gold/src/views/kling/types/identify-face.ts @@ -44,6 +44,17 @@ export interface IdentifyState { videoFileId: string | number | null } +/** + * 人脸识别结果接口(不包含状态标志) + */ +export interface IdentifyResult { + sessionId: string + faceId: string + faceStartTime: number + faceEndTime: number + videoFileId: string | number | null +} + /** * 音频状态接口 */ diff --git a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java index 2a57d8f86a..028637bb35 100644 --- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java +++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncPollingService.java @@ -365,86 +365,14 @@ public class LatentsyncPollingService { } /** - * 保存视频到OSS - 直接保存到 infra_file 避免重复 - * 返回保存结果,包含URL、文件大小和文件ID + * 保存远程视频URL + * 简化版:直接保存Kling返回的URL,不再下载上传到OSS */ private OssSaveResult saveVideoToOss(TikDigitalHumanTaskDO task, String remoteVideoUrl) throws Exception { - log.info("[saveVideoToOss][任务({})开始下载并保存视频到OSS][remoteUrl={}]", task.getId(), remoteVideoUrl); + log.info("[saveVideoToOss][任务({})直接保存Kling URL][url={}]", task.getId(), remoteVideoUrl); - try { - // 1. 下载远程视频文件 - byte[] videoBytes = downloadRemoteFile(remoteVideoUrl); - - // 2. 内存检查:超过50MB记录警告 - int sizeMB = videoBytes.length / 1024 / 1024; - if (sizeMB > 50) { - log.warn("[saveVideoToOss][任务({})视频文件较大][size={}MB]", task.getId(), sizeMB); - } - - // 3. 获取OSS目录和文件名 - Long userId = task.getUserId(); - String baseDirectory = ossInitService.getOssDirectoryByCategory(userId, "generate"); - String fileName = String.format("数字人视频_%d_%d.mp4", task.getId(), System.currentTimeMillis()); - - // 4. 获取FileClient并上传到OSS - FileClient client = fileConfigService.getMasterFileClient(); - if (client == null) { - throw new Exception("获取FileClient失败"); - } - - // 5. 生成上传路径(包含日期前缀和时间戳后缀) - String filePath = generateUploadPath(fileName, baseDirectory); - - // 6. 上传到OSS - String presignedUrl = client.upload(videoBytes, filePath, "video/mp4"); - - // 7. 移除预签名参数,获取基础URL - String cleanUrl = HttpUtils.removeUrlQuery(presignedUrl); - - // 8. 保存到 infra_file 表 - FileDO infraFile = new FileDO() - .setConfigId(client.getId()) - .setName(fileName) - .setPath(filePath) - .setUrl(cleanUrl) - .setType("video/mp4") - .setSize(videoBytes.length); - fileMapper.insert(infraFile); - Long infraFileId = infraFile.getId(); - - log.info("[saveVideoToOss][任务({})视频保存完成][infraFileId={}, size={}MB]", - task.getId(), infraFileId, sizeMB); - return new OssSaveResult(cleanUrl, videoBytes.length, filePath, infraFileId); - - } catch (Exception e) { - log.error("[saveVideoToOss][任务({})保存视频失败][remoteUrl={}]", task.getId(), remoteVideoUrl, e); - return new OssSaveResult(remoteVideoUrl, 0, null, null); - } - } - - /** - * 生成上传路径(与 FileService 保持一致) - */ - private String generateUploadPath(String name, String directory) { - String prefix = cn.hutool.core.date.LocalDateTimeUtil.format( - cn.hutool.core.date.LocalDateTimeUtil.now(), - cn.hutool.core.date.DatePattern.PURE_DATE_PATTERN); - String suffix = String.valueOf(System.currentTimeMillis()); - - String ext = cn.hutool.core.io.FileUtil.extName(name); - if (StrUtil.isNotEmpty(ext)) { - name = cn.hutool.core.io.FileUtil.mainName(name) + "_" + suffix + "." + ext; - } else { - name = name + "_" + suffix; - } - - if (StrUtil.isNotEmpty(prefix)) { - name = prefix + "/" + name; - } - if (StrUtil.isNotEmpty(directory)) { - name = directory + "/" + name; - } - return name; + // 直接返回Kling URL,不上传到OSS + return new OssSaveResult(remoteVideoUrl, 0, null, null); } /** @@ -480,57 +408,39 @@ public class LatentsyncPollingService { } } - /** - * 下载远程文件 - 内存优化 - */ - private byte[] downloadRemoteFile(String remoteUrl) throws Exception { - log.info("[downloadRemoteFile][下载文件][url={}]", remoteUrl); - - try (HttpResponse response = HttpRequest.get(remoteUrl) - .execute()) { - - if (!response.isOk()) { - throw new Exception("下载文件失败: HTTP " + response.getStatus()); - } - - // 流式读取:分块处理避免大文件OOM - byte[] bytes = response.bodyBytes(); - int sizeMB = bytes.length / 1024 / 1024; - log.info("[downloadRemoteFile][文件下载完成][size={} bytes, {}MB]", bytes.length, sizeMB); - return bytes; - } - } - /** * 保存结果视频到用户文件表 + * 如果 OSS 保存失败(infraFileId 为 null),直接保存外部 URL */ private void saveResultVideoToUserFiles(TikDigitalHumanTaskDO task, OssSaveResult saveResult) { try { Long userId = task.getUserId(); - Long infraFileId = saveResult.getInfraFileId(); - // 验证必要参数 - if (userId == null || infraFileId == null) { - log.warn("[saveResultVideoToUserFiles][任务({})参数不完整,无法保存][userId={}, infraFileId={}]", - task.getId(), userId, infraFileId); + if (userId == null) { + log.warn("[saveResultVideoToUserFiles][任务({})userId为空,无法保存]", task.getId()); return; } - // 创建用户文件记录 + // 创建用户文件记录(支持外部 URL,fileId 可为空) TikUserFileDO userFile = new TikUserFileDO(); userFile.setUserId(userId); - userFile.setFileId(infraFileId); - userFile.setFileName(String.format("数字人视频_%d_%d.mp4", task.getId(), System.currentTimeMillis())); + userFile.setFileId(saveResult.getInfraFileId()); // OSS保存失败时为null,表示外部URL + userFile.setFileName(String.format("数字人视频_%d.mp4", task.getId())); userFile.setFileType("video/mp4"); userFile.setFileCategory("generate"); userFile.setFileUrl(saveResult.getUrl()); userFile.setFilePath(saveResult.getFilePath()); - userFile.setFileSize((long) saveResult.getFileSize()); + userFile.setFileSize(saveResult.getInfraFileId() != null ? (long) saveResult.getFileSize() : null); userFileMapper.insert(userFile); - log.info("[saveResultVideoToUserFiles][任务({})文件记录已保存][userFileId={}, infraFileId={}]", - task.getId(), userFile.getId(), infraFileId); + if (saveResult.getInfraFileId() != null) { + log.info("[saveResultVideoToUserFiles][任务({})已保存到OSS][userFileId={}, infraFileId={}]", + task.getId(), userFile.getId(), saveResult.getInfraFileId()); + } else { + log.info("[saveResultVideoToUserFiles][任务({})已保存外部URL][userFileId={}, url={}]", + task.getId(), userFile.getId(), saveResult.getUrl()); + } } catch (Exception e) { log.error("[saveResultVideoToUserFiles][任务({})保存失败]", task.getId(), e); }