send-stream

2025-11-19 00:15:18 +08:00
parent 33abc33b58
commit eee3206e90
31 changed files with 3000 additions and 0 deletions
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/CosyVoiceClient.java
@@ -0,0 +1,178 @@
+package cn.iocoder.yudao.module.tik.voice.client;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.iocoder.yudao.framework.common.exception.ServiceException;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
+import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.MediaType;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
+import org.springframework.stereotype.Component;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.util.Base64;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
+import static cn.iocoder.yudao.module.tik.enmus.ErrorCodeConstants.VOICE_TTS_FAILED;
+
+/**
+ * CosyVoice 客户端
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class CosyVoiceClient {
+
+    private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
+
+    private final CosyVoiceProperties properties;
+    private final ObjectMapper objectMapper;
+
+    private volatile OkHttpClient httpClient;
+
+    /**
+     * 调用 CosyVoice TTS 接口
+     */
+    public CosyVoiceTtsResult synthesize(CosyVoiceTtsRequest request) {
+        if (!properties.isEnabled()) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
+        }
+        if (request == null || StrUtil.isBlank(request.getText())) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
+        }
+
+        try {
+            String payload = objectMapper.writeValueAsString(buildPayload(request));
+            Request httpRequest = new Request.Builder()
+                    .url(properties.getTtsUrl())
+                    .addHeader("Authorization", "Bearer " + properties.getApiKey())
+                    .addHeader("Content-Type", "application/json")
+                    .post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
+                    .build();
+
+            try (Response response = getHttpClient().newCall(httpRequest).execute()) {
+                String body = response.body() != null ? response.body().string() : "";
+                if (!response.isSuccessful()) {
+                    log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
+                    throw buildException(body);
+                }
+                return parseTtsResult(body, request);
+            }
+        } catch (ServiceException ex) {
+            throw ex;
+        } catch (Exception ex) {
+            log.error("[CosyVoice][TTS异常]", ex);
+            throw exception(VOICE_TTS_FAILED);
+        }
+    }
+
+    private Map<String, Object> buildPayload(CosyVoiceTtsRequest request) {
+        Map<String, Object> payload = new HashMap<>();
+        String model = StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel());
+        payload.put("model", model);
+
+        Map<String, Object> input = new HashMap<>();
+        input.put("text", request.getText());
+        String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
+        if (StrUtil.isNotBlank(voiceId)) {
+            input.put("voice", voiceId);
+        }
+        payload.put("input", input);
+
+        Map<String, Object> parameters = new HashMap<>();
+        int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate();
+        parameters.put("sample_rate", sampleRate);
+        String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat());
+        parameters.put("format", format);
+        if (request.getSpeechRate() != null) {
+            parameters.put("speech_rate", request.getSpeechRate());
+        }
+        if (request.getVolume() != null) {
+            parameters.put("volume", request.getVolume());
+        }
+        if (request.isPreview()) {
+            parameters.put("preview", true);
+        }
+        payload.put("parameters", parameters);
+        return payload;
+    }
+
+    private CosyVoiceTtsResult parseTtsResult(String body, CosyVoiceTtsRequest request) throws Exception {
+        JsonNode root = objectMapper.readTree(body);
+
+        // 错误响应包含 code 字段
+        if (root.has("code")) {
+            String message = root.has("message") ? root.get("message").asText() : body;
+            log.error("[CosyVoice][TTS失败][code={}, message={}]", root.get("code").asText(), message);
+            throw exception0(VOICE_TTS_FAILED.getCode(), message);
+        }
+
+        JsonNode audioNode = root.path("output").path("audio");
+        if (!audioNode.isArray() || audioNode.isEmpty()) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回的音频为空");
+        }
+
+        JsonNode firstAudio = audioNode.get(0);
+        String content = firstAudio.path("content").asText();
+        if (StrUtil.isBlank(content)) {
+            throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频内容");
+        }
+
+        byte[] audioBytes = Base64.getDecoder().decode(content);
+        CosyVoiceTtsResult result = new CosyVoiceTtsResult();
+        result.setAudio(audioBytes);
+        result.setFormat(firstAudio.path("format").asText(StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat())));
+        result.setSampleRate(firstAudio.path("sample_rate").asInt(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate()));
+        result.setRequestId(root.path("request_id").asText());
+        result.setVoiceId(firstAudio.path("voice").asText(request.getVoiceId()));
+        return result;
+    }
+
+    private OkHttpClient getHttpClient() {
+        if (httpClient == null) {
+            synchronized (this) {
+                if (httpClient == null) {
+                    java.time.Duration connect = defaultDuration(properties.getConnectTimeout(), 10);
+                    java.time.Duration read = defaultDuration(properties.getReadTimeout(), 60);
+                    httpClient = new OkHttpClient.Builder()
+                            .connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS)
+                            .readTimeout(read.toMillis(), TimeUnit.MILLISECONDS)
+                            .build();
+                }
+            }
+        }
+        return httpClient;
+    }
+
+    private Duration defaultDuration(Duration duration, long seconds) {
+        return duration == null ? Duration.ofSeconds(seconds) : duration;
+    }
+
+    private ServiceException buildException(String body) {
+        try {
+            JsonNode root = objectMapper.readTree(body);
+            String message = CollUtil.getFirst(
+                    CollUtil.newArrayList(
+                            root.path("message").asText(null),
+                            root.path("output").path("message").asText(null)));
+            return exception0(VOICE_TTS_FAILED.getCode(), StrUtil.blankToDefault(message, "CosyVoice 调用失败"));
+        } catch (Exception ignored) {
+            return exception0(VOICE_TTS_FAILED.getCode(), body);
+        }
+    }
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/LatentsyncClient.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/LatentsyncClient.java
@@ -0,0 +1,141 @@
+package cn.iocoder.yudao.module.tik.voice.client;
+
+import cn.hutool.core.util.StrUtil;
+import cn.iocoder.yudao.framework.common.exception.ServiceException;
+import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitResponse;
+import cn.iocoder.yudao.module.tik.voice.config.LatentsyncProperties;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.MediaType;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
+import org.springframework.stereotype.Component;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
+import static cn.iocoder.yudao.module.tik.enmus.ErrorCodeConstants.LATENTSYNC_SUBMIT_FAILED;
+
+/**
+ * 302AI Latentsync 客户端
+ */
+@Slf4j
+@Component
+@RequiredArgsConstructor
+public class LatentsyncClient {
+
+    private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
+
+    private final LatentsyncProperties properties;
+    private final ObjectMapper objectMapper;
+
+    private volatile OkHttpClient httpClient;
+
+    public LatentsyncSubmitResponse submitTask(LatentsyncSubmitRequest request) {
+        if (!properties.isEnabled()) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "未配置 Latentsync API Key");
+        }
+        validateRequest(request);
+
+        Map<String, Object> payload = buildPayload(request);
+        try {
+            String body = objectMapper.writeValueAsString(payload);
+            Request httpRequest = new Request.Builder()
+                    .url(properties.getSubmitUrl())
+                    .addHeader("Authorization", "Bearer " + properties.getApiKey())
+                    .addHeader("Content-Type", "application/json")
+                    .post(RequestBody.create(body.getBytes(StandardCharsets.UTF_8), JSON))
+                    .build();
+
+            try (Response response = getHttpClient().newCall(httpRequest).execute()) {
+                String responseBody = response.body() != null ? response.body().string() : "";
+                if (!response.isSuccessful()) {
+                    log.error("[Latentsync][submit failed][status={}, body={}]", response.code(), responseBody);
+                    throw buildException(responseBody);
+                }
+                LatentsyncSubmitResponse submitResponse =
+                        objectMapper.readValue(responseBody, LatentsyncSubmitResponse.class);
+                if (StrUtil.isBlank(submitResponse.getRequestId())) {
+                    log.error("[Latentsync][submit failed][response={}]", responseBody);
+                    throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "Latentsync 返回 requestId 为空");
+                }
+                return submitResponse;
+            }
+        } catch (ServiceException ex) {
+            throw ex;
+        } catch (Exception ex) {
+            log.error("[Latentsync][submit exception]", ex);
+            throw exception(LATENTSYNC_SUBMIT_FAILED);
+        }
+    }
+
+    private void validateRequest(LatentsyncSubmitRequest request) {
+        if (request == null) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "请求体不能为空");
+        }
+        if (StrUtil.isBlank(request.getAudioUrl())) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "音频地址不能为空");
+        }
+        if (StrUtil.isBlank(request.getVideoUrl())) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "视频地址不能为空");
+        }
+        Integer scale = request.getGuidanceScale();
+        if (scale != null && (scale < 1 || scale > 2)) {
+            throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "guidanceScale 取值范围 1-2");
+        }
+    }
+
+    private Map<String, Object> buildPayload(LatentsyncSubmitRequest request) {
+        Map<String, Object> payload = new HashMap<>();
+        payload.put("audio_url", request.getAudioUrl());
+        payload.put("video_url", request.getVideoUrl());
+        Integer scale = request.getGuidanceScale() != null
+                ? request.getGuidanceScale() : properties.getDefaultGuidanceScale();
+        payload.put("guidance_scale", scale);
+        Integer seed = request.getSeed() != null ? request.getSeed() : properties.getDefaultSeed();
+        payload.put("seed", seed);
+        return payload;
+    }
+
+    private OkHttpClient getHttpClient() {
+        if (httpClient == null) {
+            synchronized (this) {
+                if (httpClient == null) {
+                    Duration connect = defaultDuration(properties.getConnectTimeout(), 10);
+                    Duration read = defaultDuration(properties.getReadTimeout(), 60);
+                    httpClient = new OkHttpClient.Builder()
+                            .connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS)
+                            .readTimeout(read.toMillis(), TimeUnit.MILLISECONDS)
+                            .build();
+                }
+            }
+        }
+        return httpClient;
+    }
+
+    private Duration defaultDuration(Duration duration, long seconds) {
+        return duration == null ? Duration.ofSeconds(seconds) : duration;
+    }
+
+    private ServiceException buildException(String body) {
+        try {
+            JsonNode root = objectMapper.readTree(body);
+            String message = root.path("message").asText(body);
+            return exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), message);
+        } catch (Exception ignored) {
+            return exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), body);
+        }
+    }
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsRequest.java
@@ -0,0 +1,54 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * CosyVoice TTS 请求
+ */
+@Data
+@Builder
+public class CosyVoiceTtsRequest {
+
+    /**
+     * 待合成文本
+     */
+    private String text;
+
+    /**
+     * 声音 ID（可选，默认使用配置）
+     */
+    private String voiceId;
+
+    /**
+     * 模型（默认 cosyvoice-v2）
+     */
+    private String model;
+
+    /**
+     * 语速
+     */
+    private Float speechRate;
+
+    /**
+     * 音量，可选
+     */
+    private Float volume;
+
+    /**
+     * 采样率
+     */
+    private Integer sampleRate;
+
+    /**
+     * 音频格式
+     */
+    private String audioFormat;
+
+    /**
+     * 是否仅用于试听，方便服务侧做限流
+     */
+    private boolean preview;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsResult.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/CosyVoiceTtsResult.java
@@ -0,0 +1,37 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+/**
+ * CosyVoice TTS 响应
+ */
+@Data
+public class CosyVoiceTtsResult {
+
+    /**
+     * 请求ID
+     */
+    private String requestId;
+
+    /**
+     * 返回的音频格式
+     */
+    private String format;
+
+    /**
+     * 采样率
+     */
+    private Integer sampleRate;
+
+    /**
+     * 音频二进制内容
+     */
+    private byte[] audio;
+
+    /**
+     * 音频所使用的 voiceId
+     */
+    private String voiceId;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitRequest.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitRequest.java
@@ -0,0 +1,34 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * Latentsync 任务提交请求
+ */
+@Data
+@Builder
+public class LatentsyncSubmitRequest {
+
+    /**
+     * 音频地址（必填）
+     */
+    private String audioUrl;
+
+    /**
+     * 视频地址（必填）
+     */
+    private String videoUrl;
+
+    /**
+     * 口型约束力度（1-2）
+     */
+    private Integer guidanceScale;
+
+    /**
+     * 随机种子
+     */
+    private Integer seed;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitResponse.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/client/dto/LatentsyncSubmitResponse.java
@@ -0,0 +1,39 @@
+package cn.iocoder.yudao.module.tik.voice.client.dto;
+
+import lombok.Data;
+
+import java.util.Map;
+
+/**
+ * Latentsync 任务提交响应
+ */
+@Data
+public class LatentsyncSubmitResponse {
+
+    /**
+     * 日志内容（官方暂未返回，预留）
+     */
+    private Object logs;
+
+    /**
+     * 指标信息
+     */
+    private Map<String, Object> metrics;
+
+    /**
+     * 队列位置
+     */
+    private Integer queuePosition;
+
+    /**
+     * 任务 ID
+     */
+    private String requestId;
+
+    /**
+     * 当前状态
+     */
+    private String status;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/CosyVoiceProperties.java
@@ -0,0 +1,74 @@
+package cn.iocoder.yudao.module.tik.voice.config;
+
+import cn.hutool.core.util.StrUtil;
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.stereotype.Component;
+
+import java.time.Duration;
+
+/**
+ * CosyVoice 配置
+ */
+@Data
+@Component
+@ConfigurationProperties(prefix = "yudao.cosyvoice")
+public class CosyVoiceProperties {
+
+    /**
+     * DashScope API Key
+     */
+    private String apiKey;
+
+    /**
+     * 默认模型
+     */
+    private String defaultModel = "cosyvoice-v2";
+
+    /**
+     * 默认 voiceId（可选）
+     */
+    private String defaultVoiceId;
+
+    /**
+     * 默认采样率
+     */
+    private Integer sampleRate = 24000;
+
+    /**
+     * 默认音频格式
+     */
+    private String audioFormat = "wav";
+
+    /**
+     * 试听默认示例文本
+     */
+    private String previewText = "您好，欢迎体验专属音色。";
+
+    /**
+     * TTS 接口地址
+     */
+    private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
+
+    /**
+     * 连接超时时间
+     */
+    private Duration connectTimeout = Duration.ofSeconds(10);
+
+    /**
+     * 读取超时时间
+     */
+    private Duration readTimeout = Duration.ofSeconds(60);
+
+    /**
+     * 是否启用
+     */
+    private boolean enabled = true;
+
+    public boolean isEnabled() {
+        return enabled && StrUtil.isNotBlank(apiKey);
+    }
+
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/LatentsyncProperties.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/config/LatentsyncProperties.java
@@ -0,0 +1,78 @@
+package cn.iocoder.yudao.module.tik.voice.config;
+
+import cn.hutool.core.util.StrUtil;
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.stereotype.Component;
+
+import java.time.Duration;
+
+/**
+ * Latentsync 接口配置
+ */
+@Data
+@Component
+@ConfigurationProperties(prefix = "tik.latentsync")
+public class LatentsyncProperties {
+
+    /**
+     * 302AI API Key（可通过配置覆盖）
+     */
+    private String apiKey = "ab900d8c94094a90aed3e88cdba785c1";
+
+    /**
+     * 默认海外网关
+     */
+    private String baseUrl = "https://api.302.ai";
+
+    /**
+     * 默认国内中转网关
+     */
+    private String domesticBaseUrl = "https://api.302ai.cn";
+
+    /**
+     * 是否优先使用国内网关
+     */
+    private boolean preferDomestic = false;
+
+    /**
+     * 提交任务路径
+     */
+    private String submitPath = "/302/submit/latentsync";
+
+    /**
+     * guidance_scale 默认值（1-2）
+     */
+    private Integer defaultGuidanceScale = 1;
+
+    /**
+     * 随机种子默认值
+     */
+    private Integer defaultSeed = 8888;
+
+    /**
+     * 连接超时时间
+     */
+    private Duration connectTimeout = Duration.ofSeconds(10);
+
+    /**
+     * 读取超时时间
+     */
+    private Duration readTimeout = Duration.ofSeconds(60);
+
+    /**
+     * 是否打开调用
+     */
+    private boolean enabled = true;
+
+    public String getSubmitUrl() {
+        String base = preferDomestic ? domesticBaseUrl : baseUrl;
+        return StrUtil.blankToDefault(base, baseUrl) + submitPath;
+    }
+
+    public boolean isEnabled() {
+        return enabled && StrUtil.isNotBlank(apiKey);
+    }
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikLatentsyncController.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikLatentsyncController.java
@@ -0,0 +1,38 @@
+package cn.iocoder.yudao.module.tik.voice.controller;
+
+import cn.iocoder.yudao.framework.common.pojo.CommonResult;
+import cn.iocoder.yudao.module.tik.voice.service.LatentsyncService;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import jakarta.annotation.Resource;
+import jakarta.validation.Valid;
+import org.springframework.validation.annotation.Validated;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
+
+/**
+ * 用户 App - Latentsync 口型同步
+ */
+@Tag(name = "用户 App - Latentsync 口型同步")
+@RestController
+@RequestMapping("/api/tik/latentsync")
+@Validated
+public class AppTikLatentsyncController {
+
+    @Resource
+    private LatentsyncService latentsyncService;
+
+    @PostMapping("/submit")
+    @Operation(summary = "提交 302AI Latentsync 口型任务")
+    public CommonResult<AppTikLatentsyncSubmitRespVO> submitTask(@Valid @RequestBody AppTikLatentsyncSubmitReqVO reqVO) {
+        return success(latentsyncService.submitTask(reqVO));
+    }
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikUserVoiceController.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/controller/AppTikUserVoiceController.java
@@ -0,0 +1,95 @@
+package cn.iocoder.yudao.module.tik.voice.controller;
+
+import cn.iocoder.yudao.framework.common.pojo.CommonResult;
+import cn.iocoder.yudao.framework.common.pojo.PageResult;
+import cn.iocoder.yudao.module.tik.voice.service.TikUserVoiceService;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceUpdateReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsRespVO;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import jakarta.annotation.Resource;
+import jakarta.validation.Valid;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.validation.annotation.Validated;
+import org.springframework.web.bind.annotation.*;
+
+import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
+
+/**
+ * 用户 App - 配音管理 Controller
+ *
+ * @author 芋道源码
+ */
+@Tag(name = "用户 App - 配音管理")
+@RestController
+@RequestMapping("/api/tik/voice")
+@Validated
+@Slf4j
+public class AppTikUserVoiceController {
+
+    @Resource
+    private TikUserVoiceService voiceService;
+
+    @PostMapping("/create")
+    @Operation(summary = "创建配音")
+    public CommonResult<Long> createVoice(@Valid @RequestBody AppTikUserVoiceCreateReqVO createReqVO) {
+        return success(voiceService.createVoice(createReqVO));
+    }
+
+    @PutMapping("/update")
+    @Operation(summary = "更新配音")
+    public CommonResult<Boolean> updateVoice(@Valid @RequestBody AppTikUserVoiceUpdateReqVO updateReqVO) {
+        voiceService.updateVoice(updateReqVO);
+        return success(true);
+    }
+
+    @DeleteMapping("/delete")
+    @Operation(summary = "删除配音")
+    @Parameter(name = "id", description = "配音编号", required = true, example = "1")
+    public CommonResult<Boolean> deleteVoice(@RequestParam("id") Long id) {
+        voiceService.deleteVoice(id);
+        return success(true);
+    }
+
+    @GetMapping("/page")
+    @Operation(summary = "分页查询配音列表")
+    public CommonResult<PageResult<AppTikUserVoiceRespVO>> getVoicePage(@Valid AppTikUserVoicePageReqVO pageReqVO) {
+        return success(voiceService.getVoicePage(pageReqVO));
+    }
+
+    @GetMapping("/get")
+    @Operation(summary = "获取单个配音")
+    @Parameter(name = "id", description = "配音编号", required = true, example = "1")
+    public CommonResult<AppTikUserVoiceRespVO> getVoice(@RequestParam("id") Long id) {
+        return success(voiceService.getVoice(id));
+    }
+
+    @PostMapping("/transcribe")
+    @Operation(summary = "手动触发语音识别")
+    @Parameter(name = "id", description = "配音编号", required = true, example = "1")
+    public CommonResult<Boolean> transcribeVoice(@RequestParam("id") Long id) {
+        voiceService.transcribeVoice(id);
+        return success(true);
+    }
+
+    @PostMapping("/tts")
+    @Operation(summary = "CosyVoice 文本转语音")
+    public CommonResult<AppTikVoiceTtsRespVO> synthesizeVoice(@Valid @RequestBody AppTikVoiceTtsReqVO reqVO) {
+        return success(voiceService.synthesizeVoice(reqVO));
+    }
+
+    @PostMapping("/preview")
+    @Operation(summary = "我的音色试听")
+    public CommonResult<AppTikVoicePreviewRespVO> previewVoice(@Valid @RequestBody AppTikVoicePreviewReqVO reqVO) {
+        return success(voiceService.previewVoice(reqVO));
+    }
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/dataobject/TikUserVoiceDO.java
@@ -0,0 +1,59 @@
+package cn.iocoder.yudao.module.tik.voice.dal.dataobject;
+
+import cn.iocoder.yudao.framework.tenant.core.db.TenantBaseDO;
+import com.baomidou.mybatisplus.annotation.KeySequence;
+import com.baomidou.mybatisplus.annotation.TableId;
+import com.baomidou.mybatisplus.annotation.TableName;
+import lombok.*;
+
+/**
+ * 用户配音 DO
+ *
+ * @author 芋道源码
+ */
+@TableName("tik_user_voice")
+@KeySequence("tik_user_voice_seq") // 用于 Oracle、PostgreSQL、Kingbase、DB2、H2 数据库的主键自增。如果是 MySQL 等数据库，可不写。
+@Data
+@EqualsAndHashCode(callSuper = true)
+@ToString(callSuper = true)
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class TikUserVoiceDO extends TenantBaseDO {
+
+    /**
+     * 配音编号
+     */
+    @TableId
+    private Long id;
+    /**
+     * 用户编号
+     */
+    private Long userId;
+    /**
+     * 配音名称
+     */
+    private String name;
+    /**
+     * 音频文件编号（关联 infra_file.id）
+     */
+    private Long fileId;
+    /**
+     * 语音识别内容，为空表示未识别，有值表示已识别
+     */
+    private String transcription;
+    /**
+     * 语言：zh-CN-简体中文，zh-TW-繁體中文，en-US-English
+     */
+    private String language;
+    /**
+     * 音色类型：female-女声，male-男声
+     */
+    private String gender;
+    /**
+     * 备注信息
+     */
+    private String note;
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/mysql/TikUserVoiceMapper.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/dal/mysql/TikUserVoiceMapper.java
@@ -0,0 +1,26 @@
+package cn.iocoder.yudao.module.tik.voice.dal.mysql;
+
+import cn.iocoder.yudao.framework.common.pojo.PageResult;
+import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
+import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
+import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 用户配音 Mapper
+ *
+ * @author 芋道源码
+ */
+@Mapper
+public interface TikUserVoiceMapper extends BaseMapperX<TikUserVoiceDO> {
+
+    default PageResult<TikUserVoiceDO> selectPage(AppTikUserVoicePageReqVO reqVO) {
+        return selectPage(reqVO, new LambdaQueryWrapperX<TikUserVoiceDO>()
+                .eqIfPresent(TikUserVoiceDO::getUserId, reqVO.getUserId())
+                .likeIfPresent(TikUserVoiceDO::getName, reqVO.getName())
+                .orderByDesc(TikUserVoiceDO::getId));
+    }
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncService.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncService.java
@@ -0,0 +1,20 @@
+package cn.iocoder.yudao.module.tik.voice.service;
+
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
+
+/**
+ * Latentsync 口型同步 Service
+ */
+public interface LatentsyncService {
+
+    /**
+     * 提交 302AI Latentsync 任务
+     *
+     * @param reqVO 请求 VO
+     * @return 任务响应
+     */
+    AppTikLatentsyncSubmitRespVO submitTask(AppTikLatentsyncSubmitReqVO reqVO);
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/LatentsyncServiceImpl.java
@@ -0,0 +1,42 @@
+package cn.iocoder.yudao.module.tik.voice.service;
+
+import cn.hutool.core.util.StrUtil;
+import cn.iocoder.yudao.module.tik.voice.client.LatentsyncClient;
+import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitResponse;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
+import jakarta.validation.Valid;
+import lombok.RequiredArgsConstructor;
+import org.springframework.stereotype.Service;
+import org.springframework.validation.annotation.Validated;
+
+/**
+ * Latentsync Service 实现
+ */
+@Service
+@Validated
+@RequiredArgsConstructor
+public class LatentsyncServiceImpl implements LatentsyncService {
+
+    private final LatentsyncClient latentsyncClient;
+
+    @Override
+    public AppTikLatentsyncSubmitRespVO submitTask(@Valid AppTikLatentsyncSubmitReqVO reqVO) {
+        LatentsyncSubmitRequest request = LatentsyncSubmitRequest.builder()
+                .audioUrl(StrUtil.trim(reqVO.getAudioUrl()))
+                .videoUrl(StrUtil.trim(reqVO.getVideoUrl()))
+                .guidanceScale(reqVO.getGuidanceScale())
+                .seed(reqVO.getSeed())
+                .build();
+
+        LatentsyncSubmitResponse response = latentsyncClient.submitTask(request);
+        AppTikLatentsyncSubmitRespVO respVO = new AppTikLatentsyncSubmitRespVO();
+        respVO.setRequestId(response.getRequestId());
+        respVO.setStatus(response.getStatus());
+        respVO.setQueuePosition(response.getQueuePosition());
+        return respVO;
+    }
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceService.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceService.java
@@ -0,0 +1,75 @@
+package cn.iocoder.yudao.module.tik.voice.service;
+
+import cn.iocoder.yudao.framework.common.pojo.PageResult;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceUpdateReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsRespVO;
+
+/**
+ * 用户配音 Service 接口
+ *
+ * @author 芋道源码
+ */
+public interface TikUserVoiceService {
+
+    /**
+     * 创建配音（上传文件 + 可选自动识别）
+     *
+     * @param createReqVO 创建请求 VO
+     * @return 配音编号
+     */
+    Long createVoice(AppTikUserVoiceCreateReqVO createReqVO);
+
+    /**
+     * 更新配音信息
+     *
+     * @param updateReqVO 更新请求 VO
+     */
+    void updateVoice(AppTikUserVoiceUpdateReqVO updateReqVO);
+
+    /**
+     * 删除配音
+     *
+     * @param id 配音编号
+     */
+    void deleteVoice(Long id);
+
+    /**
+     * 分页查询
+     *
+     * @param pageReqVO 分页查询条件
+     * @return 配音列表
+     */
+    PageResult<AppTikUserVoiceRespVO> getVoicePage(AppTikUserVoicePageReqVO pageReqVO);
+
+    /**
+     * 获取单个配音
+     *
+     * @param id 配音编号
+     * @return 配音信息
+     */
+    AppTikUserVoiceRespVO getVoice(Long id);
+
+    /**
+     * 手动触发语音识别
+     *
+     * @param id 配音编号
+     */
+    void transcribeVoice(Long id);
+
+    /**
+     * CosyVoice 文本转语音
+     */
+    AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO);
+
+    /**
+     * 我的音色试听
+     */
+    AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO);
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/service/TikUserVoiceServiceImpl.java
@@ -0,0 +1,864 @@
+package cn.iocoder.yudao.module.tik.voice.service;
+
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.util.StrUtil;
+import cn.hutool.http.HttpUtil;
+import cn.hutool.json.JSONArray;
+import cn.hutool.json.JSONObject;
+import cn.hutool.json.JSONUtil;
+import cn.iocoder.yudao.framework.common.pojo.CommonResult;
+import cn.iocoder.yudao.framework.common.pojo.PageResult;
+import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
+import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
+import cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUtils;
+import cn.iocoder.yudao.module.infra.api.file.FileApi;
+import cn.iocoder.yudao.module.infra.dal.dataobject.file.FileDO;
+import cn.iocoder.yudao.module.infra.dal.mysql.file.FileMapper;
+import cn.iocoder.yudao.module.tik.file.dal.dataobject.TikUserFileDO;
+import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
+import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
+import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
+import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
+import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
+import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
+import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
+import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
+import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
+import cn.iocoder.yudao.module.tik.voice.util.ByteArrayMultipartFile;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceUpdateReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewRespVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsReqVO;
+import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsRespVO;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.data.redis.core.StringRedisTemplate;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.validation.annotation.Validated;
+
+import jakarta.annotation.Resource;
+import java.util.Arrays;
+import java.util.Base64;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
+import static cn.iocoder.yudao.module.tik.enmus.ErrorCodeConstants.*;
+
+/**
+ * 用户配音 Service 实现类
+ *
+ * @author 芋道源码
+ */
+@Service
+@Validated
+@Slf4j
+public class TikUserVoiceServiceImpl implements TikUserVoiceService {
+
+    @Resource
+    private TikUserVoiceMapper voiceMapper;
+
+    @Resource
+    private FileMapper fileMapper;
+
+    @Resource
+    private TikUserFileMapper userFileMapper;
+
+    @Resource
+    private TikUserFileService tikUserFileService;
+
+    @Resource
+    private FileApi fileApi;
+
+    @Resource
+    private TikHupService tikHupService;
+
+    @Resource
+    private CosyVoiceClient cosyVoiceClient;
+
+    @Resource
+    private CosyVoiceProperties cosyVoiceProperties;
+
+    @Resource
+    private StringRedisTemplate stringRedisTemplate;
+
+    /** 预签名URL过期时间（1小时，单位：秒） */
+    private static final int PRESIGN_URL_EXPIRATION_SECONDS = 3600;
+    private static final String PREVIEW_CACHE_PREFIX = "tik:voice:preview:";
+    private static final String SYNTH_CACHE_PREFIX = "tik:voice:tts:";
+    private static final long PREVIEW_CACHE_TTL_SECONDS = 3600;
+    private static final long SYNTH_CACHE_TTL_SECONDS = 24 * 3600;
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public Long createVoice(AppTikUserVoiceCreateReqVO createReqVO) {
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+
+        // 1. 校验文件是否存在且属于voice分类
+        FileDO fileDO = fileMapper.selectById(createReqVO.getFileId());
+        if (fileDO == null) {
+            throw exception(VOICE_FILE_NOT_EXISTS);
+        }
+        
+        // 验证文件分类是否为voice（通过tik_user_file表查询）
+        TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
+                .eq(TikUserFileDO::getFileId, createReqVO.getFileId())
+                .eq(TikUserFileDO::getFileCategory, "voice")
+                .eq(TikUserFileDO::getUserId, userId));
+        if (userFile == null) {
+            throw exception(VOICE_FILE_NOT_EXISTS, "文件不存在或不属于voice分类");
+        }
+
+        // 2. 校验名称是否重复
+        TikUserVoiceDO existingVoice = voiceMapper.selectOne(new LambdaQueryWrapperX<TikUserVoiceDO>()
+                .eq(TikUserVoiceDO::getUserId, userId)
+                .eq(TikUserVoiceDO::getName, createReqVO.getName())
+                .eq(TikUserVoiceDO::getDeleted, false));
+        if (existingVoice != null) {
+            throw exception(VOICE_NAME_DUPLICATE);
+        }
+
+        // 3. 创建配音记录
+        TikUserVoiceDO voice = new TikUserVoiceDO()
+                .setUserId(userId)
+                .setName(createReqVO.getName())
+                .setFileId(createReqVO.getFileId())
+                .setLanguage(StrUtil.blankToDefault(createReqVO.getLanguage(), "zh-CN"))
+                .setGender(StrUtil.blankToDefault(createReqVO.getGender(), "female"))
+                .setNote(createReqVO.getNote())
+                .setTranscription(null); // 初始为空，表示未识别
+        voiceMapper.insert(voice);
+
+        // 4. 如果开启自动识别，异步执行识别
+        if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) {
+            String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+            log.info("[createVoice][开启自动识别，配音编号({})，文件ID({})，预签名URL({})]", 
+                    voice.getId(), fileDO.getId(), fileAccessUrl);
+            asyncTranscribeVoice(voice.getId(), fileAccessUrl);
+        }
+
+        log.info("[createVoice][用户({})创建配音成功，配音编号({})]", userId, voice.getId());
+        return voice.getId();
+    }
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public void updateVoice(AppTikUserVoiceUpdateReqVO updateReqVO) {
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+
+        // 1. 校验配音是否存在且属于当前用户
+        TikUserVoiceDO voice = voiceMapper.selectById(updateReqVO.getId());
+        if (voice == null || !voice.getUserId().equals(userId)) {
+            throw exception(VOICE_NOT_EXISTS);
+        }
+
+        // 2. 如果更新名称，校验名称是否重复
+        if (StrUtil.isNotBlank(updateReqVO.getName()) && !updateReqVO.getName().equals(voice.getName())) {
+            TikUserVoiceDO existingVoice = voiceMapper.selectOne(new LambdaQueryWrapperX<TikUserVoiceDO>()
+                    .eq(TikUserVoiceDO::getUserId, userId)
+                    .eq(TikUserVoiceDO::getName, updateReqVO.getName())
+                    .eq(TikUserVoiceDO::getDeleted, false)
+                    .ne(TikUserVoiceDO::getId, updateReqVO.getId()));
+            if (existingVoice != null) {
+                throw exception(VOICE_NAME_DUPLICATE);
+            }
+        }
+
+        // 3. 更新配音信息
+        TikUserVoiceDO updateObj = new TikUserVoiceDO()
+                .setId(updateReqVO.getId());
+        if (StrUtil.isNotBlank(updateReqVO.getName())) {
+            updateObj.setName(updateReqVO.getName());
+        }
+        if (StrUtil.isNotBlank(updateReqVO.getLanguage())) {
+            updateObj.setLanguage(updateReqVO.getLanguage());
+        }
+        if (StrUtil.isNotBlank(updateReqVO.getGender())) {
+            updateObj.setGender(updateReqVO.getGender());
+        }
+        if (updateReqVO.getNote() != null) {
+            updateObj.setNote(updateReqVO.getNote());
+        }
+        if (updateReqVO.getTranscription() != null) {
+            updateObj.setTranscription(updateReqVO.getTranscription());
+        }
+        voiceMapper.updateById(updateObj);
+
+        log.info("[updateVoice][用户({})更新配音成功，配音编号({})]", userId, updateReqVO.getId());
+    }
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public void deleteVoice(Long id) {
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+
+        // 1. 校验配音是否存在且属于当前用户
+        TikUserVoiceDO voice = voiceMapper.selectById(id);
+        if (voice == null || !voice.getUserId().equals(userId)) {
+            throw exception(VOICE_NOT_EXISTS);
+        }
+
+        // 2. 删除音频文件（含OSS）
+        TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
+                .eq(TikUserFileDO::getFileId, voice.getFileId())
+                .eq(TikUserFileDO::getUserId, userId));
+        if (userFile != null) {
+            tikUserFileService.deleteFiles(Collections.singletonList(userFile.getId()));
+        }
+
+        // 3. 逻辑删除配音记录
+        voiceMapper.deleteById(id);
+
+        log.info("[deleteVoice][用户({})删除配音成功，配音编号({})]", userId, id);
+    }
+
+    @Override
+    public PageResult<AppTikUserVoiceRespVO> getVoicePage(AppTikUserVoicePageReqVO pageReqVO) {
+        // 自动填充当前登录用户ID
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+        pageReqVO.setUserId(userId);
+
+        // 查询配音列表
+        PageResult<TikUserVoiceDO> pageResult = voiceMapper.selectPage(pageReqVO);
+
+        // 批量查询文件信息，避免 N+1 查询
+        Map<Long, FileDO> fileMap = new HashMap<>();
+        if (CollUtil.isNotEmpty(pageResult.getList())) {
+            List<Long> fileIds = pageResult.getList().stream()
+                    .map(TikUserVoiceDO::getFileId)
+                    .distinct()
+                    .collect(Collectors.toList());
+            
+            if (CollUtil.isNotEmpty(fileIds)) {
+                List<FileDO> files = fileMapper.selectBatchIds(fileIds);
+                Map<Long, FileDO> tempFileMap = files.stream()
+                        .collect(Collectors.toMap(FileDO::getId, file -> file));
+                fileMap.putAll(tempFileMap);
+            }
+        }
+
+        // 转换为VO并关联查询文件信息
+        return CollectionUtils.convertPage(pageResult, voice -> {
+            AppTikUserVoiceRespVO vo = BeanUtils.toBean(voice, AppTikUserVoiceRespVO.class);
+
+            // 通过 file_id 关联查询文件URL，并生成预签名URL
+            FileDO fileDO = fileMap.get(voice.getFileId());
+            if (fileDO != null) {
+                // 生成预签名URL（1小时有效期）
+                String presignedUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+                vo.setFileUrl(presignedUrl);
+            }
+
+            return vo;
+        });
+    }
+
+    @Override
+    public AppTikUserVoiceRespVO getVoice(Long id) {
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+
+        // 1. 查询配音
+        TikUserVoiceDO voice = voiceMapper.selectById(id);
+        if (voice == null || !voice.getUserId().equals(userId)) {
+            throw exception(VOICE_NOT_EXISTS);
+        }
+
+        // 2. 转换为VO并关联查询文件信息
+        AppTikUserVoiceRespVO vo = BeanUtils.toBean(voice, AppTikUserVoiceRespVO.class);
+
+        // 通过 file_id 关联查询文件URL，并生成预签名URL
+        FileDO fileDO = fileMapper.selectById(voice.getFileId());
+        if (fileDO != null) {
+            // 生成预签名URL（1小时有效期）
+            String presignedUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+            vo.setFileUrl(presignedUrl);
+        }
+
+        return vo;
+    }
+
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public void transcribeVoice(Long id) {
+        Long userId = SecurityFrameworkUtils.getLoginUserId();
+
+        // 1. 校验配音是否存在且属于当前用户
+        TikUserVoiceDO voice = voiceMapper.selectById(id);
+        if (voice == null || !voice.getUserId().equals(userId)) {
+            throw exception(VOICE_NOT_EXISTS);
+        }
+
+        // 2. 获取文件URL
+        FileDO fileDO = fileMapper.selectById(voice.getFileId());
+        if (fileDO == null) {
+            throw exception(VOICE_FILE_NOT_EXISTS);
+        }
+
+        // 3. 异步执行识别
+        String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+        asyncTranscribeVoice(id, fileAccessUrl);
+    }
+
+    @Override
+    public AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO) {
+        String finalText = determineSynthesisText(
+                reqVO.getTranscriptionText(),
+                reqVO.getInputText(),
+                false);
+        finalText = appendEmotion(finalText, reqVO.getEmotion());
+
+        String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
+                reqVO.getVoiceId(),
+                reqVO.getFileUrl(),
+                finalText,
+                reqVO.getSpeechRate(),
+                reqVO.getVolume(),
+                reqVO.getEmotion(),
+                reqVO.getAudioFormat(),
+                reqVO.getSampleRate());
+
+        SynthCacheEntry synthCache = getSynthCache(cacheKey);
+        if (synthCache != null) {
+            return buildSynthResponseFromCache(reqVO, synthCache);
+        }
+
+        CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
+                finalText,
+                reqVO.getVoiceId(),
+                reqVO.getModel(),
+                reqVO.getSpeechRate(),
+                reqVO.getVolume(),
+                reqVO.getSampleRate(),
+                reqVO.getAudioFormat(),
+                false
+        ));
+
+        String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
+        String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
+        ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
+                "file",
+                buildFileName(voiceId, format),
+                resolveContentType(format),
+                ttsResult.getAudio()
+        );
+        Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
+
+        AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
+        respVO.setFileId(fileId);
+        respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
+        respVO.setFormat(format);
+        respVO.setSampleRate(ttsResult.getSampleRate());
+        respVO.setRequestId(ttsResult.getRequestId());
+        respVO.setVoiceId(voiceId);
+
+        saveSynthCache(cacheKey, new SynthCacheEntry(
+                Base64.getEncoder().encodeToString(ttsResult.getAudio()),
+                format,
+                ttsResult.getSampleRate(),
+                ttsResult.getRequestId(),
+                voiceId
+        ));
+        return respVO;
+    }
+
+    @Override
+    public AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO) {
+        String finalText = determineSynthesisText(
+                reqVO.getTranscriptionText(),
+                reqVO.getInputText(),
+                true);
+        finalText = appendEmotion(finalText, reqVO.getEmotion());
+
+        String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX,
+                reqVO.getVoiceId(),
+                reqVO.getFileUrl(),
+                finalText,
+                reqVO.getSpeechRate(),
+                reqVO.getVolume(),
+                reqVO.getEmotion(),
+                reqVO.getAudioFormat(),
+                null);
+        PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
+        String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
+
+        if (previewCache != null) {
+            String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
+            return buildPreviewResp(previewCache, cachedUrl, voiceId);
+        }
+
+        CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
+                finalText,
+                reqVO.getVoiceId(),
+                reqVO.getModel(),
+                reqVO.getSpeechRate(),
+                reqVO.getVolume(),
+                null,
+                reqVO.getAudioFormat(),
+                true
+        ));
+
+        String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
+        voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
+        String objectName = buildFileName(voiceId, format);
+        String fileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
+        String presignUrl = fileApi.presignGetUrl(fileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
+
+        PreviewCacheEntry entry = new PreviewCacheEntry(fileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
+        savePreviewCache(cacheKey, entry);
+        return buildPreviewResp(entry, presignUrl, voiceId);
+    }
+
+    private CosyVoiceTtsRequest buildTtsRequest(String text,
+                                                String voiceId,
+                                                String model,
+                                                Float speechRate,
+                                                Float volume,
+                                                Integer sampleRate,
+                                                String audioFormat,
+                                                boolean preview) {
+        return CosyVoiceTtsRequest.builder()
+                .text(text)
+                .voiceId(voiceId)
+                .model(model)
+                .speechRate(speechRate)
+                .volume(volume)
+                .sampleRate(sampleRate)
+                .audioFormat(audioFormat)
+                .preview(preview)
+                .build();
+    }
+
+    private String defaultFormat(String responseFormat, String requestFormat) {
+        return StrUtil.blankToDefault(responseFormat,
+                StrUtil.blankToDefault(requestFormat, cosyVoiceProperties.getAudioFormat()));
+    }
+
+    private String buildFileName(String voiceId, String format) {
+        String safeVoice = StrUtil.blankToDefault(voiceId, "voice")
+                .replaceAll("[^a-zA-Z0-9_-]", "");
+        return safeVoice + "-" + System.currentTimeMillis() + "." + format;
+    }
+
+    private String resolveContentType(String format) {
+        if ("wav".equalsIgnoreCase(format)) {
+            return "audio/wav";
+        }
+        if ("mp3".equalsIgnoreCase(format)) {
+            return "audio/mpeg";
+        }
+        if ("flac".equalsIgnoreCase(format)) {
+            return "audio/flac";
+        }
+        return "audio/mpeg";
+    }
+
+    private String determineSynthesisText(String transcriptionText, String inputText, boolean allowFallback) {
+        StringBuilder builder = new StringBuilder();
+        if (StrUtil.isNotBlank(transcriptionText)) {
+            builder.append(transcriptionText.trim());
+        }
+        if (StrUtil.isNotBlank(inputText)) {
+            if (builder.length() > 0) {
+                builder.append("\n");
+            }
+            builder.append(inputText.trim());
+        }
+        if (builder.length() > 0) {
+            return builder.toString();
+        }
+        if (allowFallback) {
+            return cosyVoiceProperties.getPreviewText();
+        }
+        throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
+    }
+
+    private String appendEmotion(String text, String emotion) {
+        if (StrUtil.isBlank(text)) {
+            return text;
+        }
+        if (StrUtil.isBlank(emotion) || "neutral".equalsIgnoreCase(emotion)) {
+            return text;
+        }
+        String emotionLabel = switch (emotion.toLowerCase()) {
+            case "happy" -> "高兴";
+            case "angry" -> "愤怒";
+            case "sad" -> "悲伤";
+            case "scared" -> "害怕";
+            case "disgusted" -> "厌恶";
+            case "surprised" -> "惊讶";
+            default -> emotion;
+        };
+        return "【情感：" + emotionLabel + "】" + text;
+    }
+
+    private String buildCacheKey(String prefix,
+                                 String voiceId,
+                                 String fileUrl,
+                                 String text,
+                                 Float speechRate,
+                                 Float volume,
+                                 String emotion,
+                                 String audioFormat,
+                                 Integer sampleRate) {
+        String identifier = StrUtil.isNotBlank(voiceId)
+                ? voiceId
+                : StrUtil.blankToDefault(fileUrl, "no-voice");
+        String payload = StrUtil.join("|",
+                identifier,
+                text,
+                speechRate != null ? speechRate : "1.0",
+                volume != null ? volume : "0",
+                StrUtil.blankToDefault(emotion, "neutral"),
+                StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
+                sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
+        String hash = cn.hutool.crypto.SecureUtil.sha256(payload);
+        return prefix + hash;
+    }
+
+    private PreviewCacheEntry getPreviewCache(String key) {
+        try {
+            String json = stringRedisTemplate.opsForValue().get(key);
+            if (StrUtil.isBlank(json)) {
+                return null;
+            }
+            return JSONUtil.toBean(json, PreviewCacheEntry.class);
+        } catch (Exception ex) {
+            log.warn("[previewVoice][cache read failed][key={}]", key, ex);
+            return null;
+        }
+    }
+
+    private void savePreviewCache(String key, PreviewCacheEntry entry) {
+        try {
+            stringRedisTemplate.opsForValue().set(
+                    key,
+                    JSONUtil.toJsonStr(entry),
+                    PREVIEW_CACHE_TTL_SECONDS,
+                    TimeUnit.SECONDS);
+        } catch (Exception ex) {
+            log.warn("[previewVoice][cache write failed][key={}]", key, ex);
+        }
+    }
+
+    private SynthCacheEntry getSynthCache(String key) {
+        try {
+            String json = stringRedisTemplate.opsForValue().get(key);
+            if (StrUtil.isBlank(json)) {
+                return null;
+            }
+            return JSONUtil.toBean(json, SynthCacheEntry.class);
+        } catch (Exception ex) {
+            log.warn("[synthesizeVoice][cache read failed][key={}]", key, ex);
+            return null;
+        }
+    }
+
+    private void saveSynthCache(String key, SynthCacheEntry entry) {
+        try {
+            stringRedisTemplate.opsForValue().set(
+                    key,
+                    JSONUtil.toJsonStr(entry),
+                    SYNTH_CACHE_TTL_SECONDS,
+                    TimeUnit.SECONDS);
+        } catch (Exception ex) {
+            log.warn("[synthesizeVoice][cache write failed][key={}]", key, ex);
+        }
+    }
+
+    private AppTikVoiceTtsRespVO buildSynthResponseFromCache(AppTikVoiceTtsReqVO reqVO, SynthCacheEntry cache) {
+        byte[] audioBytes = Base64.getDecoder().decode(cache.getAudioBase64());
+        String format = defaultFormat(cache.getFormat(), reqVO.getAudioFormat());
+        String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cache.getVoiceId());
+        ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
+                "file",
+                buildFileName(voiceId, format),
+                resolveContentType(format),
+                audioBytes
+        );
+        Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
+
+        AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
+        respVO.setFileId(fileId);
+        respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
+        respVO.setFormat(format);
+        respVO.setSampleRate(cache.getSampleRate());
+        respVO.setRequestId(cache.getRequestId());
+        respVO.setVoiceId(voiceId);
+        return respVO;
+    }
+
+    private AppTikVoicePreviewRespVO buildPreviewResp(PreviewCacheEntry entry, String presignUrl, String voiceId) {
+        AppTikVoicePreviewRespVO respVO = new AppTikVoicePreviewRespVO();
+        respVO.setAudioUrl(presignUrl);
+        respVO.setFormat(entry.getFormat());
+        respVO.setSampleRate(entry.getSampleRate());
+        respVO.setRequestId(entry.getRequestId());
+        respVO.setVoiceId(voiceId);
+        return respVO;
+    }
+
+    private static class PreviewCacheEntry {
+        private String fileUrl;
+        private String format;
+        private Integer sampleRate;
+        private String requestId;
+
+        public PreviewCacheEntry() {}
+
+        public PreviewCacheEntry(String fileUrl, String format, Integer sampleRate, String requestId) {
+            this.fileUrl = fileUrl;
+            this.format = format;
+            this.sampleRate = sampleRate;
+            this.requestId = requestId;
+        }
+
+        public String getFileUrl() {
+            return fileUrl;
+        }
+
+        public String getFormat() {
+            return format;
+        }
+
+        public Integer getSampleRate() {
+            return sampleRate;
+        }
+
+        public String getRequestId() {
+            return requestId;
+        }
+    }
+
+    private static class SynthCacheEntry {
+        private String audioBase64;
+        private String format;
+        private Integer sampleRate;
+        private String requestId;
+        private String voiceId;
+
+        public SynthCacheEntry() {}
+
+        public SynthCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId, String voiceId) {
+            this.audioBase64 = audioBase64;
+            this.format = format;
+            this.sampleRate = sampleRate;
+            this.requestId = requestId;
+            this.voiceId = voiceId;
+        }
+
+        public String getAudioBase64() {
+            return audioBase64;
+        }
+
+        public String getFormat() {
+            return format;
+        }
+
+        public Integer getSampleRate() {
+            return sampleRate;
+        }
+
+        public String getRequestId() {
+            return requestId;
+        }
+
+        public String getVoiceId() {
+            return voiceId;
+        }
+    }
+
+    /**
+     * 异步执行语音识别
+     *
+     * @param voiceId 配音编号
+     * @param fileUrl 文件URL
+     */
+    @Async
+    public void asyncTranscribeVoice(Long voiceId, String fileUrl) {
+        try {
+            log.info("[asyncTranscribeVoice][开始识别，配音编号({})，文件URL({})]", voiceId, fileUrl);
+            Object result = tikHupService.videoToCharacters2(Collections.singletonList(fileUrl));
+            
+            // 解析识别结果
+            String transcription = extractTranscription(result);
+            
+            if (StrUtil.isNotBlank(transcription)) {
+                // 更新识别结果
+                TikUserVoiceDO updateObj = new TikUserVoiceDO()
+                        .setId(voiceId)
+                        .setTranscription(transcription);
+                voiceMapper.updateById(updateObj);
+                log.info("[asyncTranscribeVoice][识别成功，配音编号({})，文本长度({})]", voiceId, transcription.length());
+            } else {
+                log.warn("[asyncTranscribeVoice][识别结果为空，配音编号({})，返回码({})]", 
+                        voiceId, result instanceof CommonResult ? ((CommonResult<?>) result).getCode() : "未知");
+            }
+        } catch (Exception e) {
+            log.error("[asyncTranscribeVoice][识别失败，配音编号({})，文件URL({})]", voiceId, fileUrl, e);
+        }
+    }
+
+    /**
+     * 从识别结果中提取文字内容
+     * 根据 TikHupService.videoToCharacters* 的实际返回格式进行解析
+     *
+     * @param result 识别结果
+     * @return 文字内容
+     */
+    private String extractTranscription(Object result) {
+        if (result == null) {
+            return null;
+        }
+
+        try {
+            if (result instanceof CommonResult<?> commonResult) {
+                if (!commonResult.isSuccess()) {
+                    log.warn("[extractTranscription][识别失败，code({})，msg({})]",
+                            commonResult.getCode(), commonResult.getMsg());
+                    return null;
+                }
+                Object data = commonResult.getData();
+                if (data == null) {
+                    return null;
+                }
+                String parsed = parseTranscriptionText(data);
+                if (StrUtil.isNotBlank(parsed)) {
+                    return parsed;
+                }
+                return data.toString();
+            }
+
+            String parsed = parseTranscriptionText(result);
+            if (StrUtil.isNotBlank(parsed)) {
+                return parsed;
+            }
+            return result.toString();
+        } catch (Exception e) {
+            log.warn("[extractTranscription][解析识别结果失败]", e);
+            return null;
+        }
+    }
+
+    private static final List<String> TRANSCRIPTION_TEXT_KEYS =
+            Arrays.asList("text", "sentence", "result", "content", "transcript", "output_text", "display_text");
+
+    private String parseTranscriptionText(Object rawData) {
+        if (rawData == null) {
+            return null;
+        }
+        String rawString = rawData instanceof String ? (String) rawData : JSONUtil.toJsonStr(rawData);
+        if (StrUtil.isBlank(rawString)) {
+            return null;
+        }
+        if (!JSONUtil.isTypeJSON(rawString)) {
+            return rawString;
+        }
+        try {
+            Object json = JSONUtil.parse(rawString);
+            String localText = extractTextFromJson(json);
+            if (StrUtil.isNotBlank(localText)) {
+                return localText;
+            }
+            if (json instanceof JSONObject jsonObject) {
+                JSONArray results = jsonObject.getJSONArray("results");
+                if (CollUtil.isEmpty(results)) {
+                    return null;
+                }
+                Object lastObj = results.get(results.size() - 1);
+                if (!(lastObj instanceof JSONObject lastResult)) {
+                    return null;
+                }
+                String transcriptionUrl = lastResult.getStr("transcription_url");
+                if (StrUtil.isBlank(transcriptionUrl)) {
+                    return null;
+                }
+                StringBuilder builder = new StringBuilder();
+                appendRemoteTranscription(builder, transcriptionUrl);
+                return builder.length() > 0 ? builder.toString().trim() : null;
+            }
+        } catch (Exception e) {
+            log.warn("[parseTranscriptionText][解析Paraformer结果失败]", e);
+        }
+        return rawString;
+    }
+
+    private void appendRemoteTranscription(StringBuilder builder, String transcriptionUrl) {
+        if (StrUtil.isBlank(transcriptionUrl)) {
+            return;
+        }
+        String remoteContent = fetchRemoteTranscription(transcriptionUrl);
+        if (StrUtil.isBlank(remoteContent)) {
+            return;
+        }
+        String remoteText = extractTextFromJson(JSONUtil.parse(remoteContent));
+        if (StrUtil.isNotBlank(remoteText)) {
+            appendLine(builder, remoteText);
+        }
+    }
+
+    private String extractTextFromJson(Object json) {
+        if (json == null) {
+            return null;
+        }
+        StringBuilder builder = new StringBuilder();
+        collectTranscriptionText(json, builder);
+        return builder.length() > 0 ? builder.toString().trim() : null;
+    }
+
+    private String fetchRemoteTranscription(String url) {
+        try {
+            String body = HttpUtil.get(url);
+            if (StrUtil.isNotBlank(body)) {
+                return body;
+            }
+        } catch (Exception e) {
+            log.warn("[fetchRemoteTranscription][下载转写文本失败，url({})]", url, e);
+        }
+        return null;
+    }
+
+    private void collectTranscriptionText(Object node, StringBuilder builder) {
+        if (node == null) {
+            return;
+        }
+        if (node instanceof JSONObject jsonObject) {
+            for (String key : jsonObject.keySet()) {
+                Object value = jsonObject.get(key);
+                if (value == null) {
+                    continue;
+                }
+                if (value instanceof CharSequence && TRANSCRIPTION_TEXT_KEYS.contains(key)) {
+                    appendLine(builder, value.toString());
+                } else if (value instanceof JSONObject || value instanceof JSONArray) {
+                    collectTranscriptionText(value, builder);
+                }
+            }
+        } else if (node instanceof JSONArray jsonArray) {
+            for (Object item : jsonArray) {
+                collectTranscriptionText(item, builder);
+            }
+        }
+    }
+
+    private void appendLine(StringBuilder builder, String line) {
+        String normalized = StrUtil.trim(line);
+        if (StrUtil.isBlank(normalized)) {
+            return;
+        }
+        if (builder.length() > 0) {
+            builder.append('\n');
+        }
+        builder.append(normalized);
+    }
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/util/ByteArrayMultipartFile.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/util/ByteArrayMultipartFile.java
@@ -0,0 +1,69 @@
+package cn.iocoder.yudao.module.tik.voice.util;
+
+import org.springframework.util.FileCopyUtils;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * 仅用于在服务内部上传的内存文件
+ */
+public class ByteArrayMultipartFile implements MultipartFile {
+
+    private final String name;
+    private final String originalFilename;
+    private final String contentType;
+    private final byte[] content;
+
+    public ByteArrayMultipartFile(String name, String originalFilename, String contentType, byte[] content) {
+        this.name = name;
+        this.originalFilename = originalFilename;
+        this.contentType = contentType;
+        this.content = content != null ? content : new byte[0];
+    }
+
+    @Override
+    public String getName() {
+        return name;
+    }
+
+    @Override
+    public String getOriginalFilename() {
+        return originalFilename;
+    }
+
+    @Override
+    public String getContentType() {
+        return contentType;
+    }
+
+    @Override
+    public boolean isEmpty() {
+        return content.length == 0;
+    }
+
+    @Override
+    public long getSize() {
+        return content.length;
+    }
+
+    @Override
+    public byte[] getBytes() {
+        return content;
+    }
+
+    @Override
+    public InputStream getInputStream() {
+        return new ByteArrayInputStream(content);
+    }
+
+    @Override
+    public void transferTo(File dest) throws IOException {
+        FileCopyUtils.copy(content, dest);
+    }
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncSubmitReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncSubmitReqVO.java
@@ -0,0 +1,37 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.Max;
+import jakarta.validation.constraints.Min;
+import jakarta.validation.constraints.NotBlank;
+import jakarta.validation.constraints.Size;
+import lombok.Data;
+
+/**
+ * Latentsync 提交请求 VO
+ */
+@Data
+public class AppTikLatentsyncSubmitReqVO {
+
+    @Schema(description = "音频 URL（需公网可访问）", requiredMode = Schema.RequiredMode.REQUIRED,
+            example = "https://example.com/audio.wav")
+    @NotBlank(message = "音频地址不能为空")
+    @Size(max = 1024, message = "音频地址长度不能超过 1024 字符")
+    private String audioUrl;
+
+    @Schema(description = "视频 URL（需公网可访问）", requiredMode = Schema.RequiredMode.REQUIRED,
+            example = "https://example.com/video.mp4")
+    @NotBlank(message = "视频地址不能为空")
+    @Size(max = 1024, message = "视频地址长度不能超过 1024 字符")
+    private String videoUrl;
+
+    @Schema(description = "guidance_scale，范围 1-2（默认 1）", example = "1")
+    @Min(value = 1, message = "guidanceScale 不能小于 1")
+    @Max(value = 2, message = "guidanceScale 不能大于 2")
+    private Integer guidanceScale;
+
+    @Schema(description = "随机种子（默认 8888）", example = "8888")
+    private Integer seed;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncSubmitRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikLatentsyncSubmitRespVO.java
@@ -0,0 +1,22 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+/**
+ * Latentsync 提交响应 VO
+ */
+@Data
+public class AppTikLatentsyncSubmitRespVO {
+
+    @Schema(description = "Latentsync 任务 ID", example = "8eed0b9b-6103-4357-a57b-9f135a8c3276")
+    private String requestId;
+
+    @Schema(description = "官方状态，如 IN_QUEUE、PROCESSING、SUCCEEDED", example = "IN_QUEUE")
+    private String status;
+
+    @Schema(description = "当前排队位置", example = "0")
+    private Integer queuePosition;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceCreateReqVO.java
@@ -0,0 +1,38 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.NotBlank;
+import jakarta.validation.constraints.NotNull;
+import lombok.Data;
+
+/**
+ * 用户 App - 创建配音 Request VO
+ *
+ * @author 芋道源码
+ */
+@Schema(description = "用户 App - 创建配音 Request VO")
+@Data
+public class AppTikUserVoiceCreateReqVO {
+
+    @Schema(description = "配音名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "我的配音")
+    @NotBlank(message = "配音名称不能为空")
+    private String name;
+
+    @Schema(description = "音频文件编号（关联 infra_file.id）", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
+    @NotNull(message = "音频文件编号不能为空")
+    private Long fileId;
+
+    @Schema(description = "是否自动识别", example = "false")
+    private Boolean autoTranscribe;
+
+    @Schema(description = "语言：zh-CN-简体中文，zh-TW-繁體中文，en-US-English", example = "zh-CN")
+    private String language;
+
+    @Schema(description = "音色类型：female-女声，male-男声", example = "female")
+    private String gender;
+
+    @Schema(description = "备注", example = "这是一个测试配音")
+    private String note;
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoicePageReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoicePageReqVO.java
@@ -0,0 +1,23 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import cn.iocoder.yudao.framework.common.pojo.PageParam;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+/**
+ * 用户 App - 用户配音分页 Request VO
+ *
+ * @author 芋道源码
+ */
+@Schema(description = "用户 App - 用户配音分页 Request VO")
+@Data
+public class AppTikUserVoicePageReqVO extends PageParam {
+
+    @Schema(description = "用户编号（自动填充，无需传递）")
+    private Long userId;
+
+    @Schema(description = "配音名称（模糊查询）", example = "我的配音")
+    private String name;
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceRespVO.java
@@ -0,0 +1,48 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+import java.time.LocalDateTime;
+
+/**
+ * 用户 App - 用户配音 Response VO
+ *
+ * @author 芋道源码
+ */
+@Schema(description = "用户 App - 用户配音 Response VO")
+@Data
+public class AppTikUserVoiceRespVO {
+
+    @Schema(description = "配音编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
+    private Long id;
+
+    @Schema(description = "配音名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "我的配音")
+    private String name;
+
+    @Schema(description = "音频文件编号（关联 infra_file.id）", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
+    private Long fileId;
+
+    @Schema(description = "文件访问URL（通过 file_id 关联查询获取）")
+    private String fileUrl;
+
+    @Schema(description = "语音识别内容", example = "这是识别出的文字内容")
+    private String transcription;
+
+    @Schema(description = "语言：zh-CN-简体中文，zh-TW-繁體中文，en-US-English", example = "zh-CN")
+    private String language;
+
+    @Schema(description = "音色类型：female-女声，male-男声", example = "female")
+    private String gender;
+
+    @Schema(description = "备注", example = "这是一个测试配音")
+    private String note;
+
+    @Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
+    private LocalDateTime createTime;
+
+    @Schema(description = "更新时间", requiredMode = Schema.RequiredMode.REQUIRED)
+    private LocalDateTime updateTime;
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceUpdateReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikUserVoiceUpdateReqVO.java
@@ -0,0 +1,36 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.NotNull;
+import lombok.Data;
+
+/**
+ * 用户 App - 更新配音 Request VO
+ *
+ * @author 芋道源码
+ */
+@Schema(description = "用户 App - 更新配音 Request VO")
+@Data
+public class AppTikUserVoiceUpdateReqVO {
+
+    @Schema(description = "配音编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
+    @NotNull(message = "配音编号不能为空")
+    private Long id;
+
+    @Schema(description = "配音名称", example = "我的配音")
+    private String name;
+
+    @Schema(description = "语言：zh-CN-简体中文，zh-TW-繁體中文，en-US-English", example = "zh-CN")
+    private String language;
+
+    @Schema(description = "音色类型：female-女声，male-男声", example = "female")
+    private String gender;
+
+    @Schema(description = "备注", example = "这是一个测试配音")
+    private String note;
+
+    @Schema(description = "识别内容", example = "识别文字，可手动编辑")
+    private String transcription;
+
+}
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewReqVO.java
@@ -0,0 +1,43 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.Size;
+import lombok.Data;
+
+/**
+ * 我的音色试听请求
+ */
+@Data
+public class AppTikVoicePreviewReqVO {
+
+    @Schema(description = "输入文本")
+    @Size(max = 4000, message = "输入文本不能超过 4000 个字符")
+    private String inputText;
+
+    @Schema(description = "识别文本，用于拼接")
+    @Size(max = 4000, message = "识别文本不能超过 4000 个字符")
+    private String transcriptionText;
+
+    @Schema(description = "音色 ID（CosyVoice voiceId）")
+    private String voiceId;
+
+    @Schema(description = "音色源音频 OSS 地址（当没有 voiceId 时必传）")
+    private String fileUrl;
+
+    @Schema(description = "模型名称，默认 cosyvoice-v2")
+    private String model;
+
+    @Schema(description = "语速", example = "1.0")
+    private Float speechRate;
+
+    @Schema(description = "音量", example = "0")
+    private Float volume;
+
+    @Schema(description = "情感", example = "neutral")
+    private String emotion;
+
+    @Schema(description = "音频格式，默认 wav")
+    private String audioFormat;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoicePreviewRespVO.java
@@ -0,0 +1,26 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+@Data
+@Schema(description = "音色试听响应")
+public class AppTikVoicePreviewRespVO {
+
+    @Schema(description = "音频播放地址（预签名 URL）")
+    private String audioUrl;
+
+    @Schema(description = "音频格式", example = "wav")
+    private String format;
+
+    @Schema(description = "采样率", example = "24000")
+    private Integer sampleRate;
+
+    @Schema(description = "CosyVoice 请求ID")
+    private String requestId;
+
+    @Schema(description = "使用的音色 ID")
+    private String voiceId;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsReqVO.java
@@ -0,0 +1,46 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.Size;
+import lombok.Data;
+
+/**
+ * 文本转语音请求 VO
+ */
+@Data
+public class AppTikVoiceTtsReqVO {
+
+    @Schema(description = "输入文本")
+    @Size(max = 4000, message = "输入文本不能超过 4000 个字符")
+    private String inputText;
+
+    @Schema(description = "识别文本，用于拼接")
+    @Size(max = 4000, message = "识别文本不能超过 4000 个字符")
+    private String transcriptionText;
+
+    @Schema(description = "音色 ID（CosyVoice voiceId）", example = "cosyvoice-v2-myvoice-xxx")
+    private String voiceId;
+
+    @Schema(description = "音色源音频 OSS 地址（当没有 voiceId 时必传）")
+    private String fileUrl;
+
+    @Schema(description = "模型名称，默认 cosyvoice-v2", example = "cosyvoice-v3")
+    private String model;
+
+    @Schema(description = "语速，默认 1.0", example = "1.0")
+    private Float speechRate;
+
+    @Schema(description = "情感", example = "happy")
+    private String emotion;
+
+    @Schema(description = "音量调节范围 [-10,10]", example = "0")
+    private Float volume;
+
+    @Schema(description = "目标采样率，默认 24000")
+    private Integer sampleRate;
+
+    @Schema(description = "音频格式，默认 wav，可选 mp3")
+    private String audioFormat;
+}
+
+
--- a/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
+++ b/yudao-module-tik/src/main/java/cn/iocoder/yudao/module/tik/voice/vo/AppTikVoiceTtsRespVO.java
@@ -0,0 +1,29 @@
+package cn.iocoder.yudao.module.tik.voice.vo;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+@Data
+@Schema(description = "CosyVoice 文本转语音响应")
+public class AppTikVoiceTtsRespVO {
+
+    @Schema(description = "用户文件编号", example = "1024")
+    private Long fileId;
+
+    @Schema(description = "音频播放地址（预签名 URL）")
+    private String audioUrl;
+
+    @Schema(description = "音频格式", example = "mp3")
+    private String format;
+
+    @Schema(description = "采样率", example = "24000")
+    private Integer sampleRate;
+
+    @Schema(description = "CosyVoice 请求ID")
+    private String requestId;
+
+    @Schema(description = "使用的音色 ID")
+    private String voiceId;
+}
+
+