fix:问题

This commit is contained in:
2026-02-02 02:39:40 +08:00
parent 49c660a6e3
commit 33b748915d
9 changed files with 96 additions and 43 deletions

View File

@@ -82,7 +82,8 @@
"Skill(pptx:*)", "Skill(pptx:*)",
"Bash(pdftoppm:*)", "Bash(pdftoppm:*)",
"Bash(pip install:*)", "Bash(pip install:*)",
"Bash(where:*)" "Bash(where:*)",
"mcp__web-reader__webReader"
], ],
"deny": [], "deny": [],
"ask": [] "ask": []

View File

@@ -22,6 +22,7 @@ export const VoiceService = {
* @param {string} data.language - 语言(可选) * @param {string} data.language - 语言(可选)
* @param {string} data.gender - 音色类型(可选) * @param {string} data.gender - 音色类型(可选)
* @param {string} data.note - 备注(可选) * @param {string} data.note - 备注(可选)
* @param {string} data.text - 音频文本(用于语音复刻,前端通过音频识别获取)
* @param {string} data.providerType - 供应商类型可选cosyvoice-阿里云siliconflow-硅基流动 * @param {string} data.providerType - 供应商类型可选cosyvoice-阿里云siliconflow-硅基流动
* @returns {Promise} * @returns {Promise}
*/ */

View File

@@ -133,28 +133,23 @@ export function useUpload() {
fileType: file.type, fileType: file.type,
groupId, groupId,
coverBase64, coverBase64,
duration: file.type.startsWith('video/') ? null : undefined // 视频时长由后端处理或前端可选传递 duration: file.type.startsWith('video/') ? null : undefined
}) })
// 设置成功状态
state.uploading = false state.uploading = false
state.status = 'success' state.status = 'success'
state.progress = 100 state.progress = 100
// 通知成功
const fileId = completeData.data?.infraFileId || completeData.data?.userFileId const fileId = completeData.data?.infraFileId || completeData.data?.userFileId
onSuccess && onSuccess(fileId) const fileUrl = presignedData.data.presignedUrl
onSuccess && onSuccess(fileId, fileUrl)
return fileId return fileId
} catch (error) { } catch (error) {
// 设置错误状态
state.uploading = false state.uploading = false
state.status = 'error' state.status = 'error'
state.error = error.message || '上传失败' state.error = error.message || '上传失败'
// 通知错误
onError && onError(error) onError && onError(error)
throw error throw error
} }
} }

View File

@@ -111,6 +111,7 @@ import { PlusOutlined, SearchOutlined, UploadOutlined, PlayCircleOutlined } from
import { VoiceService } from '@/api/voice' import { VoiceService } from '@/api/voice'
import { MaterialService } from '@/api/material' import { MaterialService } from '@/api/material'
import { useUpload } from '@/composables/useUpload' import { useUpload } from '@/composables/useUpload'
import useVoiceText from '@gold/hooks/web/useVoiceText'
import dayjs from 'dayjs' import dayjs from 'dayjs'
import BasicLayout from '@/layouts/components/BasicLayout.vue' import BasicLayout from '@/layouts/components/BasicLayout.vue'
@@ -123,7 +124,9 @@ const DEFAULT_FORM_DATA = {
autoTranscribe: true, autoTranscribe: true,
language: 'zh-CN', language: 'zh-CN',
gender: 'female', gender: 'female',
note: '' note: '',
text: '', // 音频文本
fileUrl: '' // 文件URL用于获取音频文本
} }
// ========== 响应式数据 ========== // ========== 响应式数据 ==========
@@ -155,6 +158,9 @@ const formData = reactive({ ...DEFAULT_FORM_DATA })
// ========== Upload Hook ========== // ========== Upload Hook ==========
const { state: uploadState, upload } = useUpload() const { state: uploadState, upload } = useUpload()
// ========== VoiceText Hook ==========
const { getVoiceText } = useVoiceText()
// ========== 计算属性 ========== // ========== 计算属性 ==========
const isCreateMode = computed(() => formMode.value === 'create') const isCreateMode = computed(() => formMode.value === 'create')
@@ -307,13 +313,16 @@ const handleCustomUpload = async (options) => {
try { try {
const fileId = await upload(file, { const fileId = await upload(file, {
fileCategory: 'voice', fileCategory: 'voice',
groupId: null, // 配音模块不使用groupId groupId: null,
coverBase64: null, coverBase64: null,
onStart: () => {}, onStart: () => {},
onProgress: () => {}, onProgress: () => {},
onSuccess: (id) => { onSuccess: async (id, fileUrl) => {
formData.fileId = id formData.fileId = id
formData.fileUrl = fileUrl // 保存文件URL
message.success('文件上传成功') message.success('文件上传成功')
// 通过fileId获取播放URL用于语音识别
await fetchAudioTextById(id)
onSuccess?.({ code: 0, data: id }, file) onSuccess?.({ code: 0, data: id }, file)
}, },
onError: (error) => { onError: (error) => {
@@ -330,12 +339,51 @@ const handleCustomUpload = async (options) => {
} }
} }
// 通过fileId获取音频文本
const fetchAudioTextById = async (fileId) => {
if (!fileId) return
try {
// 获取音频播放URL
const res = await MaterialService.getAudioPlayUrl(fileId)
if (res.code === 0 && res.data) {
const rawFileUrl = res.data
const results = await getVoiceText([{ audio_url: rawFileUrl }])
if (results && results.length > 0) {
const text = results[0].value
formData.text = text
if (text) {
message.success('音频文本获取成功')
}
}
}
} catch (error) {
console.error('获取音频文本失败:', error)
}
}
// 获取音频文本
const fetchAudioText = async (fileUrl) => {
if (!fileUrl) return
try {
// 阿里云语音识别服务无法访问预签名URL使用原始URL
const rawFileUrl = extractRawUrl(fileUrl)
const results = await getVoiceText([{ audio_url: rawFileUrl }])
if (results && results.length > 0) {
const text = results[0].value
formData.text = text
if (text) {
message.success('音频文本获取成功')
}
}
} catch (error) {
console.error('获取音频文本失败:', error)
}
}
const handleFileListChange = (info) => { const handleFileListChange = (info) => {
// 处理文件列表变化,避免直接修改导致 DOM 错误
const { fileList: newFileList } = info const { fileList: newFileList } = info
// 只更新文件列表,不直接修改文件项的状态
// 让组件自己管理状态
if (newFileList) { if (newFileList) {
fileList.value = newFileList.filter(item => item.status !== 'removed') fileList.value = newFileList.filter(item => item.status !== 'removed')
} }
@@ -363,7 +411,8 @@ const handleSubmit = async () => {
autoTranscribe: formData.autoTranscribe, autoTranscribe: formData.autoTranscribe,
language: formData.language, language: formData.language,
gender: formData.gender, gender: formData.gender,
note: formData.note note: formData.note,
text: formData.text // 传入音频文本
} }
: { : {
id: formData.id, id: formData.id,

View File

@@ -58,7 +58,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
sfRequest.setText(getOrDefault(request.getTranscriptionText(), config.getPreviewText())); sfRequest.setText(getOrDefault(request.getTranscriptionText(), config.getPreviewText()));
sfRequest.setAudio(AUDIO_MIME_TYPE + base64Audio); sfRequest.setAudio(AUDIO_MIME_TYPE + base64Audio);
// 调用上传参考音频 API
String url = config.getBaseUrl() + config.getVoiceUploadUrl(); String url = config.getBaseUrl() + config.getVoiceUploadUrl();
String requestBody = JSONUtil.toJsonStr(sfRequest); String requestBody = JSONUtil.toJsonStr(sfRequest);
log.debug("[SiliconFlowProvider][请求体]{}", requestBody); log.debug("[SiliconFlowProvider][请求体]{}", requestBody);
@@ -123,7 +122,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
.responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat())) .responseFormat(getOrDefault(request.getAudioFormat(), config.getAudioFormat()))
.build(); .build();
// 调用文本转语音 API
String url = config.getBaseUrl() + config.getTtsUrl(); String url = config.getBaseUrl() + config.getTtsUrl();
String requestBody = JSONUtil.toJsonStr(sfRequest); String requestBody = JSONUtil.toJsonStr(sfRequest);
log.debug("[SiliconFlowProvider][请求体]{}", requestBody); log.debug("[SiliconFlowProvider][请求体]{}", requestBody);
@@ -142,7 +140,6 @@ public class SiliconFlowProvider implements VoiceCloneProvider {
throw new RuntimeException("硅基流动文本转语音失败: " + errorBody); throw new RuntimeException("硅基流动文本转语音失败: " + errorBody);
} }
// 硅基流动直接返回二进制音频数据
byte[] audioBytes = response.bodyBytes(); byte[] audioBytes = response.bodyBytes();
String base64Audio = Base64.getEncoder().encodeToString(audioBytes); String base64Audio = Base64.getEncoder().encodeToString(audioBytes);

View File

@@ -36,8 +36,6 @@ public class SiliconFlowTtsRequest {
*/ */
private Float speed; private Float speed;
/** /**
* 响应格式mp3, opus, wav, pcmAPI 参数名response_format * 响应格式mp3, opus, wav, pcmAPI 参数名response_format
*/ */

View File

@@ -32,8 +32,11 @@ public class SiliconFlowProviderConfig extends VoiceProviderProperties.ProviderC
/** /**
* 默认采样率 * 默认采样率
* <p>mp3: 32000, 44100 (默认 44100)</p>
* <p>opus: 48000</p>
* <p>wav/pcm: 8000, 16000, 24000, 32000, 44100 (默认 44100)</p>
*/ */
private Integer sampleRate = 24000; private Integer sampleRate = 44100;
/** /**
* 默认音频格式 * 默认音频格式

View File

@@ -144,34 +144,38 @@ public class TikUserVoiceServiceImpl implements TikUserVoiceService {
.setLanguage(StrUtil.blankToDefault(createReqVO.getLanguage(), "zh-CN")) .setLanguage(StrUtil.blankToDefault(createReqVO.getLanguage(), "zh-CN"))
.setGender(StrUtil.blankToDefault(createReqVO.getGender(), "female")) .setGender(StrUtil.blankToDefault(createReqVO.getGender(), "female"))
.setNote(createReqVO.getNote()) .setNote(createReqVO.getNote())
.setTranscription(null); // 初始为空,表示未识别 .setTranscription(createReqVO.getText()); // 使用前端传入的文本
voiceMapper.insert(voice); voiceMapper.insert(voice);
// 4. 调用语音克隆服务,生成 voice_id // 4. 调用语音克隆服务,生成 voice_id
try { if (StrUtil.isNotBlank(createReqVO.getText())) {
log.info("[createVoice][开始语音复刻,配音编号({})文件ID({}),供应商({})]", try {
voice.getId(), fileDO.getId(), createReqVO.getProviderType()); log.info("[createVoice][开始语音复刻,配音编号({})文件ID({}),供应商({})]",
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS); voice.getId(), fileDO.getId(), createReqVO.getProviderType());
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType()); VoiceCloneProvider provider = voiceProviderFactory.getProvider(createReqVO.getProviderType());
String providerType = getProviderType(createReqVO.getProviderType(), provider); String providerType = getProviderType(createReqVO.getProviderType(), provider);
String model = getModelByProvider(providerType); String model = getModelByProvider(providerType);
VoiceCloneRequest cloneRequest = new VoiceCloneRequest(); VoiceCloneRequest cloneRequest = new VoiceCloneRequest();
cloneRequest.setAudioUrl(fileAccessUrl); cloneRequest.setAudioUrl(fileAccessUrl);
cloneRequest.setModel(model); cloneRequest.setModel(model);
cloneRequest.setPrefix("voice" + voice.getId()); cloneRequest.setPrefix("voice" + voice.getId());
cloneRequest.setTranscriptionText(voice.getTranscription()); cloneRequest.setTranscriptionText(createReqVO.getText()); // 使用前端传入的文本
VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest); VoiceCloneResult cloneResult = provider.cloneVoice(cloneRequest);
String voiceId = cloneResult.getVoiceId(); String voiceId = cloneResult.getVoiceId();
voice.setVoiceId(voiceId); voice.setVoiceId(voiceId);
voiceMapper.updateById(voice); voiceMapper.updateById(voice);
log.info("[createVoice][语音复刻成功,配音编号({})voice_id({})]", voice.getId(), voiceId); log.info("[createVoice][语音复刻成功,配音编号({})voice_id({})]", voice.getId(), voiceId);
} catch (Exception e) { } catch (Exception e) {
log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e); log.error("[createVoice][语音复刻失败,配音编号({}),错误信息: {}]", voice.getId(), e.getMessage(), e);
}
} else {
log.info("[createVoice][未提供文本,跳过语音复刻,配音编号({})]", voice.getId());
} }

View File

@@ -3,6 +3,7 @@ package cn.iocoder.yudao.module.tik.voice.vo;
import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.NotNull;
import jakarta.validation.constraints.Size;
import lombok.Data; import lombok.Data;
/** /**
@@ -34,6 +35,10 @@ public class AppTikUserVoiceCreateReqVO {
@Schema(description = "备注", example = "这是一个测试配音") @Schema(description = "备注", example = "这是一个测试配音")
private String note; private String note;
@Schema(description = "音频文本(用于语音复刻,前端通过音频识别获取)")
@Size(max = 4000, message = "音频文本不能超过 4000 个字符")
private String text;
@Schema(description = "供应商类型cosyvoice-阿里云siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice") @Schema(description = "供应商类型cosyvoice-阿里云siliconflow-硅基流动(不传则使用默认)", example = "cosyvoice")
private String providerType; private String providerType;