feat: 功能优化

2025-12-02 01:55:57 +08:00
parent 900b47f585
commit 0fffd787bb
32 changed files with 974 additions and 2156 deletions
--- a/frontend/app/web-gold/src/api/kling.js
+++ b/frontend/app/web-gold/src/api/kling.js
@@ -21,7 +21,7 @@ export function identifyFace(data) {
 */
 export function createLipSyncTask(data) {
  return request({
-    url: '/webApi/api/tik/kling/lip-sync/create',
+    url: '/webApi/api/tik/kling/task/create',
    method: 'post',
    data
  })
@@ -98,7 +98,10 @@ export async function createKlingTaskAndIdentify(file) {
        fileId,
        videoUrl,
        sessionId: identifyRes.data.sessionId,
-        faceId: identifyRes.data.data.face_data[0].face_id || null
+        faceId: identifyRes.data.data.face_data[0].face_id || null,
+        // 人脸时间信息，用于音频插入时间
+        startTime: identifyRes.data.data.face_data[0].start_time || 0,
+        endTime: identifyRes.data.data.face_data[0].end_time || 0
      }
    }
  } catch (error) {
--- a/frontend/app/web-gold/src/components/SidebarNav.vue
+++ b/frontend/app/web-gold/src/components/SidebarNav.vue
@@ -37,7 +37,7 @@ const items = computed(() => {
      children: [
        { path: '/digital-human/voice-copy', label: '人声克隆', icon: 'mic' },
        { path: "/digital-human/kling", label: "可灵数字人", icon: "user" },
-        { path: '/digital-human/video', label: '数字人视频', icon: 'video' },
+        // { path: '/digital-human/video', label: '数字人视频', icon: 'video' },
      ]
    },
    {
--- a/frontend/app/web-gold/src/utils/excel.js
+++ b/frontend/app/web-gold/src/utils/excel.js
@@ -61,8 +61,7 @@ export function exportBenchmarkDataToExcel(data, options = {}) {
      { key: '链接', width: 60, align: 'left' },
      { key: '封面链接', width: 60, align: 'left' },
      { key: '音频链接', width: 60, align: 'left' },
-      { key: '原配音', width: 80, align: 'left', wrap: true },
-      { key: '风格提示词', width: 80, align: 'left', wrap: true }
+      { key: '原配音', width: 80, align: 'left', wrap: true }
    ]

    // 准备导出数据
@@ -82,8 +81,7 @@ export function exportBenchmarkDataToExcel(data, options = {}) {
        '链接': item.share_url || '',
        '封面链接': item.cover || '',
        '音频链接': item.audio_url || '',
-        '原配音': item.transcriptions || '',
-        '风格提示词': item.prompt || ''
+        '原配音': item.transcriptions || ''
      }
      
      // 小红书平台特殊处理
--- a/frontend/app/web-gold/src/views/kling/IdentifyFace.vue
+++ b/frontend/app/web-gold/src/views/kling/IdentifyFace.vue
@@ -13,10 +13,16 @@
          <h3>文案</h3>
          <a-textarea
            v-model:value="ttsText"
-            placeholder="请输入你想让角色说话的内容"
+            :placeholder="textareaPlaceholder"
            :rows="4"
+            :maxlength="maxTextLength"
+            :show-count="true"
            class="tts-textarea"
          />
+          <div v-if="identified && faceDuration > 0" class="text-hint">
+            <span class="hint-icon">💡</span>
+            <span>视频中人脸出现时长约 {{ (faceDuration / 1000).toFixed(1) }} 秒，建议文案不超过 {{ suggestedMaxChars }} 字</span>
+          </div>
        </div>

        <!-- 音色选择 -->
@@ -139,6 +145,74 @@
          </div>
        </div>

+        <!-- 配音生成与校验（仅在识别后显示） -->
+        <div v-if="identified" class="section audio-generation-section">
+          <h3>配音生成与校验</h3>
+
+          <!-- 生成配音按钮 -->
+          <div class="generate-audio-row">
+            <a-button
+              type="default"
+              size="large"
+              :disabled="!canGenerateAudio"
+              :loading="generatingAudio"
+              block
+              @click="handleGenerateAudio"
+            >
+              {{ generatingAudio ? '生成中...' : '生成配音（用于校验时长）' }}
+            </a-button>
+          </div>
+
+          <!-- 音频预览（生成后显示） -->
+          <div v-if="generatedAudio" class="audio-preview">
+            <div class="audio-info">
+              <h4>生成的配音</h4>
+              <div class="duration-info">
+                <span class="label">音频时长：</span>
+                <span class="value">{{ (audioDurationMs / 1000).toFixed(1) }} 秒</span>
+              </div>
+              <div class="duration-info">
+                <span class="label">人脸区间：</span>
+                <span class="value">{{ (faceDuration / 1000).toFixed(1) }} 秒</span>
+              </div>
+              <div class="duration-info" :class="{ 'validation-passed': audioValidationPassed, 'validation-failed': !audioValidationPassed }">
+                <span class="label">校验结果：</span>
+                <span class="value">
+                  {{ audioValidationPassed ? '✅ 通过' : '❌ 不通过（需至少2秒重合）' }}
+                </span>
+              </div>
+            </div>
+
+            <!-- 音频播放器 -->
+            <div class="audio-player">
+              <audio
+                v-if="generatedAudio.audioBase64"
+                :src="`data:audio/mp3;base64,${generatedAudio.audioBase64}`"
+                controls
+                class="audio-element"
+              />
+              <audio
+                v-else-if="generatedAudio.audioUrl"
+                :src="generatedAudio.audioUrl"
+                controls
+                class="audio-element"
+              />
+            </div>
+
+            <!-- 重新生成按钮 -->
+            <div class="regenerate-row">
+              <a-button
+                type="link"
+                size="small"
+                @click="handleGenerateAudio"
+                :loading="generatingAudio"
+              >
+                重新生成
+              </a-button>
+            </div>
+          </div>
+        </div>
+
        <!-- 按钮组 -->
        <div class="action-buttons">
          <a-button
@@ -162,6 +236,12 @@
          >
            {{ isGenerating ? '生成中...' : '生成数字人视频' }}
          </a-button>
+
+          <!-- 添加提示信息 -->
+          <div v-if="canGenerate && !audioValidationPassed" class="generate-hint">
+            <span class="hint-icon">⚠️</span>
+            <span>请先生成配音并通过时长校验</span>
+          </div>
        </div>
      </div>

@@ -191,6 +271,7 @@ import { ref, computed, onMounted } from 'vue'
 import { message } from 'ant-design-vue'
 import { SoundOutlined, LoadingOutlined } from '@ant-design/icons-vue'
 import { createKlingTaskAndIdentify ,createLipSyncTask , getLipSyncTask} from '@/api/kling'
+import { getDigitalHumanTask } from '@/api/digitalHuman'
 import { MaterialService } from '@/api/material'
 import { VoiceService } from '@/api/voice'
 import { useVoiceCopyStore } from '@/stores/voiceCopy'
@@ -213,8 +294,44 @@ const currentTaskError = ref('') // 任务错误信息
 // 识别结果存储
 const identifySessionId = ref('') // 人脸识别会话ID
 const identifyFaceId = ref('') // 选中的人脸ID
+const identifyFaceStartTime = ref(0) // 人脸可对口型区间起点时间（ms）
+const identifyFaceEndTime = ref(0) // 人脸可对口型区间终点时间（ms）
 const identifyVideoFileId = ref(null) // 视频文件ID

+// 配音预生成状态
+const generatedAudio = ref(null) // 生成的音频数据
+const audioDurationMs = ref(0) // 音频时长（毫秒）
+const audioValidationPassed = ref(false) // 时长校验是否通过
+const generatingAudio = ref(false) // 是否正在生成配音
+
+// 人脸区间时长（毫秒）
+const faceDuration = computed(() => identifyFaceEndTime.value - identifyFaceStartTime.value)
+
+// 基于人脸时长计算建议的最大文案字数（中文约3.5字/秒）
+const suggestedMaxChars = computed(() => {
+  const durationSec = faceDuration.value / 1000
+  // 语速影响：语速越高，单位时间可说更多字
+  const adjustedRate = speechRate.value || 1.0
+  return Math.floor(durationSec * 3.5 * adjustedRate)
+})
+
+// 最大文案长度限制（略大于建议值，留有余地）
+const maxTextLength = computed(() => {
+  if (!identified.value || faceDuration.value <= 0) {
+    return 4000 // 未识别时使用默认限制
+  }
+  // 最大字数 = 建议字数 * 1.2，但不超过4000
+  return Math.min(4000, Math.floor(suggestedMaxChars.value * 1.2))
+})
+
+// 文案输入框提示文字
+const textareaPlaceholder = computed(() => {
+  if (identified.value && faceDuration.value > 0) {
+    return `请输入文案，建议不超过${suggestedMaxChars.value}字以确保与视频匹配`
+  }
+  return '请输入你想让角色说话的内容'
+})
+
 // 音频试听缓存
 const previewAudioCache = new Map()
 const MAX_PREVIEW_CACHE_SIZE = 50
@@ -259,8 +376,17 @@ const canGenerate = computed(() => {
  const hasText = ttsText.value.trim()
  const hasVoice = selectedVoiceMeta.value
  const hasVideo = uploadedVideo.value
+  const isIdentified = identified.value // 必须先识别
  const notGenerating = !isGenerating.value
-  return !!(hasText && hasVoice && hasVideo && notGenerating)
+  const audioValidated = audioValidationPassed.value // 必须通过音频时长校验
+  return !!(hasText && hasVoice && hasVideo && isIdentified && notGenerating && audioValidated)
+})
+
+// 新增：生成配音的条件（不需要通过校验，只需要基本的文案和音色）
+const canGenerateAudio = computed(() => {
+  const hasText = ttsText.value.trim()
+  const hasVoice = selectedVoiceMeta.value
+  return !!(hasText && hasVoice && !generatingAudio.value)
 })

 // UI 控制
@@ -434,12 +560,17 @@ const handleIdentify = async () => {
    // 保存识别结果
    identifySessionId.value = res.data.sessionId
    identifyVideoFileId.value = res.data.fileId
-    identifyVideoFileId.value = res.data.faceId
+    identifyFaceId.value = res.data.faceId
+    // 保存人脸时间信息，用于音频插入时间
+    identifyFaceStartTime.value = res.data.startTime || 0
+    identifyFaceEndTime.value = res.data.endTime || 0
    identified.value = true
-    message.success('识别完成！')
-    console.log( '识别结果:', res.data)
-    // 识别成功后，延迟1.5秒自动生成数字人视频
-    await handleGenerate()
+
+    // 显示识别成功提示，包含人脸区间信息
+    const durationSec = (identifyFaceEndTime.value - identifyFaceStartTime.value) / 1000
+    message.success(`识别完成！人脸出现时长约 ${durationSec.toFixed(1)} 秒，建议文案不超过 ${suggestedMaxChars.value} 字`)
+    console.log('识别结果:', res.data)
+    // 不再自动触发生成，让用户先调整文案
  } catch (error) {
    message.error(error.message || '识别失败')
  } finally {
@@ -447,6 +578,147 @@ const handleIdentify = async () => {
  }
 }

+// 预生成配音（用于时长校验）
+const handleGenerateAudio = async () => {
+  if (!ttsText.value.trim()) {
+    message.warning('请输入文案内容')
+    return
+  }
+
+  const voice = selectedVoiceMeta.value
+  if (!voice) {
+    message.warning('请选择音色')
+    return
+  }
+
+  generatingAudio.value = true
+  try {
+    const params = {
+      inputText: ttsText.value,
+      voiceConfigId: voice.rawId || extractIdFromString(voice.id),
+      speechRate: speechRate.value || 1.0,
+      audioFormat: 'mp3'
+    }
+
+    const res = await VoiceService.synthesize(params)
+    if (res.code === 0) {
+      generatedAudio.value = res.data
+
+      // ✅ 严格依赖前端解析的真实时长（TTS API的durationMs不可靠）
+      if (!res.data.audioBase64) {
+        throw new Error('未收到音频数据，无法进行时长解析')
+      }
+
+      try {
+        audioDurationMs.value = await parseAudioDuration(res.data.audioBase64)
+        // 自动校验时长
+        validateAudioDuration()
+        message.success('配音生成成功！')
+      } catch (error) {
+        // 解析失败则终止流程，要求用户重新生成
+        console.error('❌ 音频解析失败:', error)
+        message.error('音频解析失败，请重新生成配音')
+        audioDurationMs.value = 0
+        generatedAudio.value = null
+        audioValidationPassed.value = false
+      }
+    } else {
+      throw new Error(res.msg || '配音生成失败')
+    }
+  } catch (error) {
+    console.error('generateAudio error:', error)
+    message.error(error.message || '配音生成失败')
+  } finally {
+    generatingAudio.value = false
+  }
+}
+
+/**
+ * 解析音频Base64数据并获取实际时长
+ * @param {string} base64Data - Base64音频数据（可包含 data:audio/...;base64, 前缀）
+ * @returns {Promise<number>} 音频时长（毫秒）
+ */
+const parseAudioDuration = (base64Data) => {
+  return new Promise((resolve, reject) => {
+    try {
+      // 提取纯Base64数据（移除data:...;base64,前缀）
+      const base64 = base64Data.includes(',') ? base64Data.split(',')[1] : base64Data
+
+      // Base64转二进制数据
+      const binaryString = window.atob(base64)
+      const bytes = new Uint8Array(binaryString.length)
+      for (let i = 0; i < binaryString.length; i++) {
+        bytes[i] = binaryString.charCodeAt(i)
+      }
+
+      // 创建Blob对象
+      const blob = new Blob([bytes], { type: 'audio/mp3' })
+
+      // 创建音频对象并解析时长
+      const audio = new Audio()
+      const objectUrl = URL.createObjectURL(blob)
+
+      audio.addEventListener('loadedmetadata', () => {
+        URL.revokeObjectURL(objectUrl)
+        const durationMs = Math.round(audio.duration * 1000) // 转换为毫秒
+        console.log('✅ 音频解析完成：', {
+          duration: audio.duration + '秒',
+          durationMs: durationMs + '毫秒'
+        })
+        resolve(durationMs)
+      })
+
+      audio.addEventListener('error', (error) => {
+        URL.revokeObjectURL(objectUrl)
+        console.warn('⚠️ 音频解析失败，使用API返回的时长')
+        reject(error)
+      })
+
+      // 设置音频源并加载
+      audio.src = objectUrl
+      audio.load()
+    } catch (error) {
+      console.error('❌ 音频解析异常:', error)
+      reject(error)
+    }
+  })
+}
+
+// 校验音频时长与人脸时长的匹配性
+// 要求：音频与人脸区间至少重合2秒
+const validateAudioDuration = () => {
+  if (!identified.value || faceDuration.value <= 0) {
+    audioValidationPassed.value = false
+    return false
+  }
+
+  const faceStart = identifyFaceStartTime.value
+  const faceEnd = identifyFaceEndTime.value
+  const faceDurationMs = faceEnd - faceStart
+  const audioDuration = audioDurationMs.value
+
+  // 计算重合区间（简化：假设音频从人脸起点开始插入）
+  const overlapStart = faceStart
+  const overlapEnd = Math.min(faceEnd, faceStart + audioDuration)
+  const overlapDuration = Math.max(0, overlapEnd - overlapStart)
+
+  // 校验：重合区间至少2秒
+  const isValid = overlapDuration >= 2000
+
+  audioValidationPassed.value = isValid
+
+  if (!isValid) {
+    const overlapSec = (overlapDuration / 1000).toFixed(1)
+    message.warning(
+      `音频时长（${(audioDuration/1000).toFixed(1)}秒）与人脸区间（${(faceDurationMs/1000).toFixed(1)}秒）不匹配，重合部分仅${overlapSec}秒，至少需要2秒`
+    )
+  } else {
+    message.success('时长校验通过！')
+  }
+
+  return isValid
+}
+
 // 生成数字人视频
 const handleGenerate = async () => {
  if (!canGenerate.value) {
@@ -454,6 +726,13 @@ const handleGenerate = async () => {
    return
  }

+  // 检查文案内容
+  const text = ttsText.value.trim()
+  if (!text) {
+    message.warning('请输入文案内容')
+    return
+  }
+
  const voice = selectedVoiceMeta.value
  if (!voice) {
    message.warning('请选择音色')
@@ -474,9 +753,12 @@ const handleGenerate = async () => {
      volume: 0,
      guidanceScale: 1,
      seed: 8888,
-      klingSessionId: identifySessionId.value,
-      klingFaceId: identifyFaceId.value,
-      aiProvider: 'kling'
+      kling_session_id: identifySessionId.value,
+      kling_face_id: identifyFaceId.value,
+      // 人脸可对口型时间区间，用于音频插入时间
+      kling_face_start_time: identifyFaceStartTime.value,
+      kling_face_end_time: identifyFaceEndTime.value,
+      ai_provider: 'kling'
    }

    const configId = voice.rawId || extractIdFromString(voice.id)
@@ -486,6 +768,25 @@ const handleGenerate = async () => {
    }
    taskData.voiceConfigId = configId

+    // ✅ 新增：传递预生成的音频给后端，复用而不重复TTS
+    if (generatedAudio.value && audioDurationMs.value > 0) {
+      taskData.pre_generated_audio = {
+        audioBase64: generatedAudio.value.audioBase64,
+        format: generatedAudio.value.format || 'mp3'
+      }
+
+      // ✅ 新增：传递 sound_end_time 给可灵API（音频结束时间）
+      // 可灵API要求：音频从0开始，所以结束时间 = 0 + 音频时长
+      taskData.sound_end_time = audioDurationMs.value
+
+      console.log('传递预生成音频给后端：', {
+        soundEndTime: taskData.sound_end_time,
+        hasAudioData: !!generatedAudio.value.audioBase64
+      })
+    } else {
+      console.warn('⚠️ 未找到预生成音频，将在后端重新TTS')
+    }
+
    message.loading('正在创建任务...', 0)
    const res = await createLipSyncTask(taskData)
    message.destroy()
@@ -710,6 +1011,23 @@ let previewObjectUrl = ''
  color: #fff;
 }

+.text-hint {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  margin-top: 8px;
+  padding: 8px 12px;
+  background: rgba(59, 130, 246, 0.1);
+  border: 1px solid rgba(59, 130, 246, 0.2);
+  border-radius: 6px;
+  font-size: 13px;
+  color: #94a3b8;
+}
+
+.hint-icon {
+  font-size: 14px;
+}
+
 .voice-source-toggle {
  display: inline-flex;
  border: 1px solid rgba(59, 130, 246, 0.2);
@@ -1088,4 +1406,83 @@ let previewObjectUrl = ''
    grid-template-columns: 1fr;
  }
 }
+
+/* 配音生成与校验样式 */
+.audio-generation-section {
+  margin-bottom: 24px;
+  padding: 16px;
+  background: rgba(255, 255, 255, 0.03);
+  border-radius: 12px;
+  border: 1px solid rgba(59, 130, 246, 0.15);
+}
+
+.generate-audio-row {
+  margin-bottom: 16px;
+}
+
+.audio-preview {
+  padding: 16px;
+  background: rgba(0, 0, 0, 0.2);
+  border-radius: 8px;
+}
+
+.audio-info h4 {
+  color: #fff;
+  margin-bottom: 12px;
+  font-size: 14px;
+}
+
+.duration-info {
+  display: flex;
+  justify-content: space-between;
+  margin-bottom: 8px;
+  font-size: 13px;
+}
+
+.duration-info .label {
+  color: var(--color-text-secondary);
+}
+
+.duration-info .value {
+  color: #fff;
+  font-weight: 600;
+}
+
+.duration-info.validation-passed .value {
+  color: #52c41a;
+}
+
+.duration-info.validation-failed .value {
+  color: #ff4d4f;
+}
+
+.audio-player {
+  margin: 16px 0;
+}
+
+.audio-element {
+  width: 100%;
+}
+
+.regenerate-row {
+  text-align: center;
+  margin-top: 12px;
+}
+
+.generate-hint {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  margin-top: 8px;
+  padding: 8px 12px;
+  background: rgba(255, 193, 7, 0.1);
+  border: 1px solid rgba(255, 193, 7, 0.3);
+  border-radius: 6px;
+  font-size: 13px;
+  color: #faad14;
+}
+
+.hint-icon {
+  font-size: 14px;
+}
 </style>