feat: 功能优化

This commit is contained in:
2025-12-02 01:55:57 +08:00
parent 900b47f585
commit 0fffd787bb
32 changed files with 974 additions and 2156 deletions

View File

@@ -13,10 +13,16 @@
<h3>文案</h3>
<a-textarea
v-model:value="ttsText"
placeholder="请输入你想让角色说话的内容"
:placeholder="textareaPlaceholder"
:rows="4"
:maxlength="maxTextLength"
:show-count="true"
class="tts-textarea"
/>
<div v-if="identified && faceDuration > 0" class="text-hint">
<span class="hint-icon">💡</span>
<span>视频中人脸出现时长约 {{ (faceDuration / 1000).toFixed(1) }} 建议文案不超过 {{ suggestedMaxChars }} </span>
</div>
</div>
<!-- 音色选择 -->
@@ -139,6 +145,74 @@
</div>
</div>
<!-- 配音生成与校验仅在识别后显示 -->
<div v-if="identified" class="section audio-generation-section">
<h3>配音生成与校验</h3>
<!-- 生成配音按钮 -->
<div class="generate-audio-row">
<a-button
type="default"
size="large"
:disabled="!canGenerateAudio"
:loading="generatingAudio"
block
@click="handleGenerateAudio"
>
{{ generatingAudio ? '生成中...' : '生成配音(用于校验时长)' }}
</a-button>
</div>
<!-- 音频预览生成后显示 -->
<div v-if="generatedAudio" class="audio-preview">
<div class="audio-info">
<h4>生成的配音</h4>
<div class="duration-info">
<span class="label">音频时长</span>
<span class="value">{{ (audioDurationMs / 1000).toFixed(1) }} </span>
</div>
<div class="duration-info">
<span class="label">人脸区间</span>
<span class="value">{{ (faceDuration / 1000).toFixed(1) }} </span>
</div>
<div class="duration-info" :class="{ 'validation-passed': audioValidationPassed, 'validation-failed': !audioValidationPassed }">
<span class="label">校验结果</span>
<span class="value">
{{ audioValidationPassed ? '✅ 通过' : '❌ 不通过需至少2秒重合' }}
</span>
</div>
</div>
<!-- 音频播放器 -->
<div class="audio-player">
<audio
v-if="generatedAudio.audioBase64"
:src="`data:audio/mp3;base64,${generatedAudio.audioBase64}`"
controls
class="audio-element"
/>
<audio
v-else-if="generatedAudio.audioUrl"
:src="generatedAudio.audioUrl"
controls
class="audio-element"
/>
</div>
<!-- 重新生成按钮 -->
<div class="regenerate-row">
<a-button
type="link"
size="small"
@click="handleGenerateAudio"
:loading="generatingAudio"
>
重新生成
</a-button>
</div>
</div>
</div>
<!-- 按钮组 -->
<div class="action-buttons">
<a-button
@@ -162,6 +236,12 @@
>
{{ isGenerating ? '生成中...' : '生成数字人视频' }}
</a-button>
<!-- 添加提示信息 -->
<div v-if="canGenerate && !audioValidationPassed" class="generate-hint">
<span class="hint-icon"></span>
<span>请先生成配音并通过时长校验</span>
</div>
</div>
</div>
@@ -191,6 +271,7 @@ import { ref, computed, onMounted } from 'vue'
import { message } from 'ant-design-vue'
import { SoundOutlined, LoadingOutlined } from '@ant-design/icons-vue'
import { createKlingTaskAndIdentify ,createLipSyncTask , getLipSyncTask} from '@/api/kling'
import { getDigitalHumanTask } from '@/api/digitalHuman'
import { MaterialService } from '@/api/material'
import { VoiceService } from '@/api/voice'
import { useVoiceCopyStore } from '@/stores/voiceCopy'
@@ -213,8 +294,44 @@ const currentTaskError = ref('') // 任务错误信息
// 识别结果存储
const identifySessionId = ref('') // 人脸识别会话ID
const identifyFaceId = ref('') // 选中的人脸ID
const identifyFaceStartTime = ref(0) // 人脸可对口型区间起点时间ms
const identifyFaceEndTime = ref(0) // 人脸可对口型区间终点时间ms
const identifyVideoFileId = ref(null) // 视频文件ID
// 配音预生成状态
const generatedAudio = ref(null) // 生成的音频数据
const audioDurationMs = ref(0) // 音频时长(毫秒)
const audioValidationPassed = ref(false) // 时长校验是否通过
const generatingAudio = ref(false) // 是否正在生成配音
// 人脸区间时长(毫秒)
const faceDuration = computed(() => identifyFaceEndTime.value - identifyFaceStartTime.value)
// 基于人脸时长计算建议的最大文案字数中文约3.5字/秒)
const suggestedMaxChars = computed(() => {
const durationSec = faceDuration.value / 1000
// 语速影响:语速越高,单位时间可说更多字
const adjustedRate = speechRate.value || 1.0
return Math.floor(durationSec * 3.5 * adjustedRate)
})
// 最大文案长度限制(略大于建议值,留有余地)
const maxTextLength = computed(() => {
if (!identified.value || faceDuration.value <= 0) {
return 4000 // 未识别时使用默认限制
}
// 最大字数 = 建议字数 * 1.2但不超过4000
return Math.min(4000, Math.floor(suggestedMaxChars.value * 1.2))
})
// 文案输入框提示文字
const textareaPlaceholder = computed(() => {
if (identified.value && faceDuration.value > 0) {
return `请输入文案,建议不超过${suggestedMaxChars.value}字以确保与视频匹配`
}
return '请输入你想让角色说话的内容'
})
// 音频试听缓存
const previewAudioCache = new Map()
const MAX_PREVIEW_CACHE_SIZE = 50
@@ -259,8 +376,17 @@ const canGenerate = computed(() => {
const hasText = ttsText.value.trim()
const hasVoice = selectedVoiceMeta.value
const hasVideo = uploadedVideo.value
const isIdentified = identified.value // 必须先识别
const notGenerating = !isGenerating.value
return !!(hasText && hasVoice && hasVideo && notGenerating)
const audioValidated = audioValidationPassed.value // 必须通过音频时长校验
return !!(hasText && hasVoice && hasVideo && isIdentified && notGenerating && audioValidated)
})
// 新增:生成配音的条件(不需要通过校验,只需要基本的文案和音色)
const canGenerateAudio = computed(() => {
const hasText = ttsText.value.trim()
const hasVoice = selectedVoiceMeta.value
return !!(hasText && hasVoice && !generatingAudio.value)
})
// UI 控制
@@ -434,12 +560,17 @@ const handleIdentify = async () => {
// 保存识别结果
identifySessionId.value = res.data.sessionId
identifyVideoFileId.value = res.data.fileId
identifyVideoFileId.value = res.data.faceId
identifyFaceId.value = res.data.faceId
// 保存人脸时间信息,用于音频插入时间
identifyFaceStartTime.value = res.data.startTime || 0
identifyFaceEndTime.value = res.data.endTime || 0
identified.value = true
message.success('识别完成!')
console.log( '识别结果:', res.data)
// 识别成功后延迟1.5秒自动生成数字人视频
await handleGenerate()
// 显示识别成功提示,包含人脸区间信息
const durationSec = (identifyFaceEndTime.value - identifyFaceStartTime.value) / 1000
message.success(`识别完成!人脸出现时长约 ${durationSec.toFixed(1)} 秒,建议文案不超过 ${suggestedMaxChars.value}`)
console.log('识别结果:', res.data)
// 不再自动触发生成,让用户先调整文案
} catch (error) {
message.error(error.message || '识别失败')
} finally {
@@ -447,6 +578,147 @@ const handleIdentify = async () => {
}
}
// 预生成配音(用于时长校验)
const handleGenerateAudio = async () => {
if (!ttsText.value.trim()) {
message.warning('请输入文案内容')
return
}
const voice = selectedVoiceMeta.value
if (!voice) {
message.warning('请选择音色')
return
}
generatingAudio.value = true
try {
const params = {
inputText: ttsText.value,
voiceConfigId: voice.rawId || extractIdFromString(voice.id),
speechRate: speechRate.value || 1.0,
audioFormat: 'mp3'
}
const res = await VoiceService.synthesize(params)
if (res.code === 0) {
generatedAudio.value = res.data
// ✅ 严格依赖前端解析的真实时长TTS API的durationMs不可靠
if (!res.data.audioBase64) {
throw new Error('未收到音频数据,无法进行时长解析')
}
try {
audioDurationMs.value = await parseAudioDuration(res.data.audioBase64)
// 自动校验时长
validateAudioDuration()
message.success('配音生成成功!')
} catch (error) {
// 解析失败则终止流程,要求用户重新生成
console.error('❌ 音频解析失败:', error)
message.error('音频解析失败,请重新生成配音')
audioDurationMs.value = 0
generatedAudio.value = null
audioValidationPassed.value = false
}
} else {
throw new Error(res.msg || '配音生成失败')
}
} catch (error) {
console.error('generateAudio error:', error)
message.error(error.message || '配音生成失败')
} finally {
generatingAudio.value = false
}
}
/**
* 解析音频Base64数据并获取实际时长
* @param {string} base64Data - Base64音频数据可包含 data:audio/...;base64, 前缀)
* @returns {Promise<number>} 音频时长(毫秒)
*/
const parseAudioDuration = (base64Data) => {
return new Promise((resolve, reject) => {
try {
// 提取纯Base64数据移除data:...;base64,前缀)
const base64 = base64Data.includes(',') ? base64Data.split(',')[1] : base64Data
// Base64转二进制数据
const binaryString = window.atob(base64)
const bytes = new Uint8Array(binaryString.length)
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i)
}
// 创建Blob对象
const blob = new Blob([bytes], { type: 'audio/mp3' })
// 创建音频对象并解析时长
const audio = new Audio()
const objectUrl = URL.createObjectURL(blob)
audio.addEventListener('loadedmetadata', () => {
URL.revokeObjectURL(objectUrl)
const durationMs = Math.round(audio.duration * 1000) // 转换为毫秒
console.log('✅ 音频解析完成:', {
duration: audio.duration + '秒',
durationMs: durationMs + '毫秒'
})
resolve(durationMs)
})
audio.addEventListener('error', (error) => {
URL.revokeObjectURL(objectUrl)
console.warn('⚠️ 音频解析失败使用API返回的时长')
reject(error)
})
// 设置音频源并加载
audio.src = objectUrl
audio.load()
} catch (error) {
console.error('❌ 音频解析异常:', error)
reject(error)
}
})
}
// 校验音频时长与人脸时长的匹配性
// 要求音频与人脸区间至少重合2秒
const validateAudioDuration = () => {
if (!identified.value || faceDuration.value <= 0) {
audioValidationPassed.value = false
return false
}
const faceStart = identifyFaceStartTime.value
const faceEnd = identifyFaceEndTime.value
const faceDurationMs = faceEnd - faceStart
const audioDuration = audioDurationMs.value
// 计算重合区间(简化:假设音频从人脸起点开始插入)
const overlapStart = faceStart
const overlapEnd = Math.min(faceEnd, faceStart + audioDuration)
const overlapDuration = Math.max(0, overlapEnd - overlapStart)
// 校验重合区间至少2秒
const isValid = overlapDuration >= 2000
audioValidationPassed.value = isValid
if (!isValid) {
const overlapSec = (overlapDuration / 1000).toFixed(1)
message.warning(
`音频时长(${(audioDuration/1000).toFixed(1)}秒)与人脸区间(${(faceDurationMs/1000).toFixed(1)}秒)不匹配,重合部分仅${overlapSec}至少需要2秒`
)
} else {
message.success('时长校验通过!')
}
return isValid
}
// 生成数字人视频
const handleGenerate = async () => {
if (!canGenerate.value) {
@@ -454,6 +726,13 @@ const handleGenerate = async () => {
return
}
// 检查文案内容
const text = ttsText.value.trim()
if (!text) {
message.warning('请输入文案内容')
return
}
const voice = selectedVoiceMeta.value
if (!voice) {
message.warning('请选择音色')
@@ -474,9 +753,12 @@ const handleGenerate = async () => {
volume: 0,
guidanceScale: 1,
seed: 8888,
klingSessionId: identifySessionId.value,
klingFaceId: identifyFaceId.value,
aiProvider: 'kling'
kling_session_id: identifySessionId.value,
kling_face_id: identifyFaceId.value,
// 人脸可对口型时间区间,用于音频插入时间
kling_face_start_time: identifyFaceStartTime.value,
kling_face_end_time: identifyFaceEndTime.value,
ai_provider: 'kling'
}
const configId = voice.rawId || extractIdFromString(voice.id)
@@ -486,6 +768,25 @@ const handleGenerate = async () => {
}
taskData.voiceConfigId = configId
// ✅ 新增传递预生成的音频给后端复用而不重复TTS
if (generatedAudio.value && audioDurationMs.value > 0) {
taskData.pre_generated_audio = {
audioBase64: generatedAudio.value.audioBase64,
format: generatedAudio.value.format || 'mp3'
}
// ✅ 新增:传递 sound_end_time 给可灵API音频结束时间
// 可灵API要求音频从0开始所以结束时间 = 0 + 音频时长
taskData.sound_end_time = audioDurationMs.value
console.log('传递预生成音频给后端:', {
soundEndTime: taskData.sound_end_time,
hasAudioData: !!generatedAudio.value.audioBase64
})
} else {
console.warn('⚠️ 未找到预生成音频将在后端重新TTS')
}
message.loading('正在创建任务...', 0)
const res = await createLipSyncTask(taskData)
message.destroy()
@@ -710,6 +1011,23 @@ let previewObjectUrl = ''
color: #fff;
}
.text-hint {
display: flex;
align-items: center;
gap: 6px;
margin-top: 8px;
padding: 8px 12px;
background: rgba(59, 130, 246, 0.1);
border: 1px solid rgba(59, 130, 246, 0.2);
border-radius: 6px;
font-size: 13px;
color: #94a3b8;
}
.hint-icon {
font-size: 14px;
}
.voice-source-toggle {
display: inline-flex;
border: 1px solid rgba(59, 130, 246, 0.2);
@@ -1088,4 +1406,83 @@ let previewObjectUrl = ''
grid-template-columns: 1fr;
}
}
/* 配音生成与校验样式 */
.audio-generation-section {
margin-bottom: 24px;
padding: 16px;
background: rgba(255, 255, 255, 0.03);
border-radius: 12px;
border: 1px solid rgba(59, 130, 246, 0.15);
}
.generate-audio-row {
margin-bottom: 16px;
}
.audio-preview {
padding: 16px;
background: rgba(0, 0, 0, 0.2);
border-radius: 8px;
}
.audio-info h4 {
color: #fff;
margin-bottom: 12px;
font-size: 14px;
}
.duration-info {
display: flex;
justify-content: space-between;
margin-bottom: 8px;
font-size: 13px;
}
.duration-info .label {
color: var(--color-text-secondary);
}
.duration-info .value {
color: #fff;
font-weight: 600;
}
.duration-info.validation-passed .value {
color: #52c41a;
}
.duration-info.validation-failed .value {
color: #ff4d4f;
}
.audio-player {
margin: 16px 0;
}
.audio-element {
width: 100%;
}
.regenerate-row {
text-align: center;
margin-top: 12px;
}
.generate-hint {
display: flex;
align-items: center;
gap: 8px;
margin-top: 8px;
padding: 8px 12px;
background: rgba(255, 193, 7, 0.1);
border: 1px solid rgba(255, 193, 7, 0.3);
border-radius: 6px;
font-size: 13px;
color: #faad14;
}
.hint-icon {
font-size: 14px;
}
</style>