send-stream
This commit is contained in:
@@ -0,0 +1,178 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.framework.common.exception.ServiceException;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.Base64;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
|
||||
import static cn.iocoder.yudao.module.tik.enmus.ErrorCodeConstants.VOICE_TTS_FAILED;
|
||||
|
||||
/**
|
||||
* CosyVoice 客户端
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class CosyVoiceClient {
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private final CosyVoiceProperties properties;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
private volatile OkHttpClient httpClient;
|
||||
|
||||
/**
|
||||
* 调用 CosyVoice TTS 接口
|
||||
*/
|
||||
public CosyVoiceTtsResult synthesize(CosyVoiceTtsRequest request) {
|
||||
if (!properties.isEnabled()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "未配置 CosyVoice API Key");
|
||||
}
|
||||
if (request == null || StrUtil.isBlank(request.getText())) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "TTS 文本不能为空");
|
||||
}
|
||||
|
||||
try {
|
||||
String payload = objectMapper.writeValueAsString(buildPayload(request));
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(properties.getTtsUrl())
|
||||
.addHeader("Authorization", "Bearer " + properties.getApiKey())
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.post(RequestBody.create(payload.getBytes(StandardCharsets.UTF_8), JSON))
|
||||
.build();
|
||||
|
||||
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
|
||||
String body = response.body() != null ? response.body().string() : "";
|
||||
if (!response.isSuccessful()) {
|
||||
log.error("[CosyVoice][TTS失败][status={}, body={}]", response.code(), body);
|
||||
throw buildException(body);
|
||||
}
|
||||
return parseTtsResult(body, request);
|
||||
}
|
||||
} catch (ServiceException ex) {
|
||||
throw ex;
|
||||
} catch (Exception ex) {
|
||||
log.error("[CosyVoice][TTS异常]", ex);
|
||||
throw exception(VOICE_TTS_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> buildPayload(CosyVoiceTtsRequest request) {
|
||||
Map<String, Object> payload = new HashMap<>();
|
||||
String model = StrUtil.blankToDefault(request.getModel(), properties.getDefaultModel());
|
||||
payload.put("model", model);
|
||||
|
||||
Map<String, Object> input = new HashMap<>();
|
||||
input.put("text", request.getText());
|
||||
String voiceId = StrUtil.blankToDefault(request.getVoiceId(), properties.getDefaultVoiceId());
|
||||
if (StrUtil.isNotBlank(voiceId)) {
|
||||
input.put("voice", voiceId);
|
||||
}
|
||||
payload.put("input", input);
|
||||
|
||||
Map<String, Object> parameters = new HashMap<>();
|
||||
int sampleRate = request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate();
|
||||
parameters.put("sample_rate", sampleRate);
|
||||
String format = StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat());
|
||||
parameters.put("format", format);
|
||||
if (request.getSpeechRate() != null) {
|
||||
parameters.put("speech_rate", request.getSpeechRate());
|
||||
}
|
||||
if (request.getVolume() != null) {
|
||||
parameters.put("volume", request.getVolume());
|
||||
}
|
||||
if (request.isPreview()) {
|
||||
parameters.put("preview", true);
|
||||
}
|
||||
payload.put("parameters", parameters);
|
||||
return payload;
|
||||
}
|
||||
|
||||
private CosyVoiceTtsResult parseTtsResult(String body, CosyVoiceTtsRequest request) throws Exception {
|
||||
JsonNode root = objectMapper.readTree(body);
|
||||
|
||||
// 错误响应包含 code 字段
|
||||
if (root.has("code")) {
|
||||
String message = root.has("message") ? root.get("message").asText() : body;
|
||||
log.error("[CosyVoice][TTS失败][code={}, message={}]", root.get("code").asText(), message);
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), message);
|
||||
}
|
||||
|
||||
JsonNode audioNode = root.path("output").path("audio");
|
||||
if (!audioNode.isArray() || audioNode.isEmpty()) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回的音频为空");
|
||||
}
|
||||
|
||||
JsonNode firstAudio = audioNode.get(0);
|
||||
String content = firstAudio.path("content").asText();
|
||||
if (StrUtil.isBlank(content)) {
|
||||
throw exception0(VOICE_TTS_FAILED.getCode(), "CosyVoice 返回空音频内容");
|
||||
}
|
||||
|
||||
byte[] audioBytes = Base64.getDecoder().decode(content);
|
||||
CosyVoiceTtsResult result = new CosyVoiceTtsResult();
|
||||
result.setAudio(audioBytes);
|
||||
result.setFormat(firstAudio.path("format").asText(StrUtil.blankToDefault(request.getAudioFormat(), properties.getAudioFormat())));
|
||||
result.setSampleRate(firstAudio.path("sample_rate").asInt(request.getSampleRate() != null ? request.getSampleRate() : properties.getSampleRate()));
|
||||
result.setRequestId(root.path("request_id").asText());
|
||||
result.setVoiceId(firstAudio.path("voice").asText(request.getVoiceId()));
|
||||
return result;
|
||||
}
|
||||
|
||||
private OkHttpClient getHttpClient() {
|
||||
if (httpClient == null) {
|
||||
synchronized (this) {
|
||||
if (httpClient == null) {
|
||||
java.time.Duration connect = defaultDuration(properties.getConnectTimeout(), 10);
|
||||
java.time.Duration read = defaultDuration(properties.getReadTimeout(), 60);
|
||||
httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.readTimeout(read.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
}
|
||||
return httpClient;
|
||||
}
|
||||
|
||||
private Duration defaultDuration(Duration duration, long seconds) {
|
||||
return duration == null ? Duration.ofSeconds(seconds) : duration;
|
||||
}
|
||||
|
||||
private ServiceException buildException(String body) {
|
||||
try {
|
||||
JsonNode root = objectMapper.readTree(body);
|
||||
String message = CollUtil.getFirst(
|
||||
CollUtil.newArrayList(
|
||||
root.path("message").asText(null),
|
||||
root.path("output").path("message").asText(null)));
|
||||
return exception0(VOICE_TTS_FAILED.getCode(), StrUtil.blankToDefault(message, "CosyVoice 调用失败"));
|
||||
} catch (Exception ignored) {
|
||||
return exception0(VOICE_TTS_FAILED.getCode(), body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.framework.common.exception.ServiceException;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitResponse;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.LatentsyncProperties;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception0;
|
||||
import static cn.iocoder.yudao.module.tik.enmus.ErrorCodeConstants.LATENTSYNC_SUBMIT_FAILED;
|
||||
|
||||
/**
|
||||
* 302AI Latentsync 客户端
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class LatentsyncClient {
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
private final LatentsyncProperties properties;
|
||||
private final ObjectMapper objectMapper;
|
||||
|
||||
private volatile OkHttpClient httpClient;
|
||||
|
||||
public LatentsyncSubmitResponse submitTask(LatentsyncSubmitRequest request) {
|
||||
if (!properties.isEnabled()) {
|
||||
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "未配置 Latentsync API Key");
|
||||
}
|
||||
validateRequest(request);
|
||||
|
||||
Map<String, Object> payload = buildPayload(request);
|
||||
try {
|
||||
String body = objectMapper.writeValueAsString(payload);
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(properties.getSubmitUrl())
|
||||
.addHeader("Authorization", "Bearer " + properties.getApiKey())
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.post(RequestBody.create(body.getBytes(StandardCharsets.UTF_8), JSON))
|
||||
.build();
|
||||
|
||||
try (Response response = getHttpClient().newCall(httpRequest).execute()) {
|
||||
String responseBody = response.body() != null ? response.body().string() : "";
|
||||
if (!response.isSuccessful()) {
|
||||
log.error("[Latentsync][submit failed][status={}, body={}]", response.code(), responseBody);
|
||||
throw buildException(responseBody);
|
||||
}
|
||||
LatentsyncSubmitResponse submitResponse =
|
||||
objectMapper.readValue(responseBody, LatentsyncSubmitResponse.class);
|
||||
if (StrUtil.isBlank(submitResponse.getRequestId())) {
|
||||
log.error("[Latentsync][submit failed][response={}]", responseBody);
|
||||
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "Latentsync 返回 requestId 为空");
|
||||
}
|
||||
return submitResponse;
|
||||
}
|
||||
} catch (ServiceException ex) {
|
||||
throw ex;
|
||||
} catch (Exception ex) {
|
||||
log.error("[Latentsync][submit exception]", ex);
|
||||
throw exception(LATENTSYNC_SUBMIT_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
private void validateRequest(LatentsyncSubmitRequest request) {
|
||||
if (request == null) {
|
||||
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "请求体不能为空");
|
||||
}
|
||||
if (StrUtil.isBlank(request.getAudioUrl())) {
|
||||
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "音频地址不能为空");
|
||||
}
|
||||
if (StrUtil.isBlank(request.getVideoUrl())) {
|
||||
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "视频地址不能为空");
|
||||
}
|
||||
Integer scale = request.getGuidanceScale();
|
||||
if (scale != null && (scale < 1 || scale > 2)) {
|
||||
throw exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), "guidanceScale 取值范围 1-2");
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> buildPayload(LatentsyncSubmitRequest request) {
|
||||
Map<String, Object> payload = new HashMap<>();
|
||||
payload.put("audio_url", request.getAudioUrl());
|
||||
payload.put("video_url", request.getVideoUrl());
|
||||
Integer scale = request.getGuidanceScale() != null
|
||||
? request.getGuidanceScale() : properties.getDefaultGuidanceScale();
|
||||
payload.put("guidance_scale", scale);
|
||||
Integer seed = request.getSeed() != null ? request.getSeed() : properties.getDefaultSeed();
|
||||
payload.put("seed", seed);
|
||||
return payload;
|
||||
}
|
||||
|
||||
private OkHttpClient getHttpClient() {
|
||||
if (httpClient == null) {
|
||||
synchronized (this) {
|
||||
if (httpClient == null) {
|
||||
Duration connect = defaultDuration(properties.getConnectTimeout(), 10);
|
||||
Duration read = defaultDuration(properties.getReadTimeout(), 60);
|
||||
httpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(connect.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.readTimeout(read.toMillis(), TimeUnit.MILLISECONDS)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
}
|
||||
return httpClient;
|
||||
}
|
||||
|
||||
private Duration defaultDuration(Duration duration, long seconds) {
|
||||
return duration == null ? Duration.ofSeconds(seconds) : duration;
|
||||
}
|
||||
|
||||
private ServiceException buildException(String body) {
|
||||
try {
|
||||
JsonNode root = objectMapper.readTree(body);
|
||||
String message = root.path("message").asText(body);
|
||||
return exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), message);
|
||||
} catch (Exception ignored) {
|
||||
return exception0(LATENTSYNC_SUBMIT_FAILED.getCode(), body);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice TTS 请求
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class CosyVoiceTtsRequest {
|
||||
|
||||
/**
|
||||
* 待合成文本
|
||||
*/
|
||||
private String text;
|
||||
|
||||
/**
|
||||
* 声音 ID(可选,默认使用配置)
|
||||
*/
|
||||
private String voiceId;
|
||||
|
||||
/**
|
||||
* 模型(默认 cosyvoice-v2)
|
||||
*/
|
||||
private String model;
|
||||
|
||||
/**
|
||||
* 语速
|
||||
*/
|
||||
private Float speechRate;
|
||||
|
||||
/**
|
||||
* 音量,可选
|
||||
*/
|
||||
private Float volume;
|
||||
|
||||
/**
|
||||
* 采样率
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频格式
|
||||
*/
|
||||
private String audioFormat;
|
||||
|
||||
/**
|
||||
* 是否仅用于试听,方便服务侧做限流
|
||||
*/
|
||||
private boolean preview;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* CosyVoice TTS 响应
|
||||
*/
|
||||
@Data
|
||||
public class CosyVoiceTtsResult {
|
||||
|
||||
/**
|
||||
* 请求ID
|
||||
*/
|
||||
private String requestId;
|
||||
|
||||
/**
|
||||
* 返回的音频格式
|
||||
*/
|
||||
private String format;
|
||||
|
||||
/**
|
||||
* 采样率
|
||||
*/
|
||||
private Integer sampleRate;
|
||||
|
||||
/**
|
||||
* 音频二进制内容
|
||||
*/
|
||||
private byte[] audio;
|
||||
|
||||
/**
|
||||
* 音频所使用的 voiceId
|
||||
*/
|
||||
private String voiceId;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* Latentsync 任务提交请求
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class LatentsyncSubmitRequest {
|
||||
|
||||
/**
|
||||
* 音频地址(必填)
|
||||
*/
|
||||
private String audioUrl;
|
||||
|
||||
/**
|
||||
* 视频地址(必填)
|
||||
*/
|
||||
private String videoUrl;
|
||||
|
||||
/**
|
||||
* 口型约束力度(1-2)
|
||||
*/
|
||||
private Integer guidanceScale;
|
||||
|
||||
/**
|
||||
* 随机种子
|
||||
*/
|
||||
private Integer seed;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.client.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Latentsync 任务提交响应
|
||||
*/
|
||||
@Data
|
||||
public class LatentsyncSubmitResponse {
|
||||
|
||||
/**
|
||||
* 日志内容(官方暂未返回,预留)
|
||||
*/
|
||||
private Object logs;
|
||||
|
||||
/**
|
||||
* 指标信息
|
||||
*/
|
||||
private Map<String, Object> metrics;
|
||||
|
||||
/**
|
||||
* 队列位置
|
||||
*/
|
||||
private Integer queuePosition;
|
||||
|
||||
/**
|
||||
* 任务 ID
|
||||
*/
|
||||
private String requestId;
|
||||
|
||||
/**
|
||||
* 当前状态
|
||||
*/
|
||||
private String status;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* CosyVoice 配置
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "yudao.cosyvoice")
|
||||
public class CosyVoiceProperties {
|
||||
|
||||
/**
|
||||
* DashScope API Key
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* 默认模型
|
||||
*/
|
||||
private String defaultModel = "cosyvoice-v2";
|
||||
|
||||
/**
|
||||
* 默认 voiceId(可选)
|
||||
*/
|
||||
private String defaultVoiceId;
|
||||
|
||||
/**
|
||||
* 默认采样率
|
||||
*/
|
||||
private Integer sampleRate = 24000;
|
||||
|
||||
/**
|
||||
* 默认音频格式
|
||||
*/
|
||||
private String audioFormat = "wav";
|
||||
|
||||
/**
|
||||
* 试听默认示例文本
|
||||
*/
|
||||
private String previewText = "您好,欢迎体验专属音色。";
|
||||
|
||||
/**
|
||||
* TTS 接口地址
|
||||
*/
|
||||
private String ttsUrl = "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/speech-synthesis";
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
private Duration connectTimeout = Duration.ofSeconds(10);
|
||||
|
||||
/**
|
||||
* 读取超时时间
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(60);
|
||||
|
||||
/**
|
||||
* 是否启用
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
public boolean isEnabled() {
|
||||
return enabled && StrUtil.isNotBlank(apiKey);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.config;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* Latentsync 接口配置
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "tik.latentsync")
|
||||
public class LatentsyncProperties {
|
||||
|
||||
/**
|
||||
* 302AI API Key(可通过配置覆盖)
|
||||
*/
|
||||
private String apiKey = "ab900d8c94094a90aed3e88cdba785c1";
|
||||
|
||||
/**
|
||||
* 默认海外网关
|
||||
*/
|
||||
private String baseUrl = "https://api.302.ai";
|
||||
|
||||
/**
|
||||
* 默认国内中转网关
|
||||
*/
|
||||
private String domesticBaseUrl = "https://api.302ai.cn";
|
||||
|
||||
/**
|
||||
* 是否优先使用国内网关
|
||||
*/
|
||||
private boolean preferDomestic = false;
|
||||
|
||||
/**
|
||||
* 提交任务路径
|
||||
*/
|
||||
private String submitPath = "/302/submit/latentsync";
|
||||
|
||||
/**
|
||||
* guidance_scale 默认值(1-2)
|
||||
*/
|
||||
private Integer defaultGuidanceScale = 1;
|
||||
|
||||
/**
|
||||
* 随机种子默认值
|
||||
*/
|
||||
private Integer defaultSeed = 8888;
|
||||
|
||||
/**
|
||||
* 连接超时时间
|
||||
*/
|
||||
private Duration connectTimeout = Duration.ofSeconds(10);
|
||||
|
||||
/**
|
||||
* 读取超时时间
|
||||
*/
|
||||
private Duration readTimeout = Duration.ofSeconds(60);
|
||||
|
||||
/**
|
||||
* 是否打开调用
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
public String getSubmitUrl() {
|
||||
String base = preferDomestic ? domesticBaseUrl : baseUrl;
|
||||
return StrUtil.blankToDefault(base, baseUrl) + submitPath;
|
||||
}
|
||||
|
||||
public boolean isEnabled() {
|
||||
return enabled && StrUtil.isNotBlank(apiKey);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.controller;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.pojo.CommonResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.service.LatentsyncService;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import jakarta.annotation.Resource;
|
||||
import jakarta.validation.Valid;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
|
||||
|
||||
/**
|
||||
* 用户 App - Latentsync 口型同步
|
||||
*/
|
||||
@Tag(name = "用户 App - Latentsync 口型同步")
|
||||
@RestController
|
||||
@RequestMapping("/api/tik/latentsync")
|
||||
@Validated
|
||||
public class AppTikLatentsyncController {
|
||||
|
||||
@Resource
|
||||
private LatentsyncService latentsyncService;
|
||||
|
||||
@PostMapping("/submit")
|
||||
@Operation(summary = "提交 302AI Latentsync 口型任务")
|
||||
public CommonResult<AppTikLatentsyncSubmitRespVO> submitTask(@Valid @RequestBody AppTikLatentsyncSubmitReqVO reqVO) {
|
||||
return success(latentsyncService.submitTask(reqVO));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.controller;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.pojo.CommonResult;
|
||||
import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.service.TikUserVoiceService;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceUpdateReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewRespVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsRespVO;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.Parameter;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
import jakarta.annotation.Resource;
|
||||
import jakarta.validation.Valid;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
|
||||
|
||||
/**
|
||||
* 用户 App - 配音管理 Controller
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Tag(name = "用户 App - 配音管理")
|
||||
@RestController
|
||||
@RequestMapping("/api/tik/voice")
|
||||
@Validated
|
||||
@Slf4j
|
||||
public class AppTikUserVoiceController {
|
||||
|
||||
@Resource
|
||||
private TikUserVoiceService voiceService;
|
||||
|
||||
@PostMapping("/create")
|
||||
@Operation(summary = "创建配音")
|
||||
public CommonResult<Long> createVoice(@Valid @RequestBody AppTikUserVoiceCreateReqVO createReqVO) {
|
||||
return success(voiceService.createVoice(createReqVO));
|
||||
}
|
||||
|
||||
@PutMapping("/update")
|
||||
@Operation(summary = "更新配音")
|
||||
public CommonResult<Boolean> updateVoice(@Valid @RequestBody AppTikUserVoiceUpdateReqVO updateReqVO) {
|
||||
voiceService.updateVoice(updateReqVO);
|
||||
return success(true);
|
||||
}
|
||||
|
||||
@DeleteMapping("/delete")
|
||||
@Operation(summary = "删除配音")
|
||||
@Parameter(name = "id", description = "配音编号", required = true, example = "1")
|
||||
public CommonResult<Boolean> deleteVoice(@RequestParam("id") Long id) {
|
||||
voiceService.deleteVoice(id);
|
||||
return success(true);
|
||||
}
|
||||
|
||||
@GetMapping("/page")
|
||||
@Operation(summary = "分页查询配音列表")
|
||||
public CommonResult<PageResult<AppTikUserVoiceRespVO>> getVoicePage(@Valid AppTikUserVoicePageReqVO pageReqVO) {
|
||||
return success(voiceService.getVoicePage(pageReqVO));
|
||||
}
|
||||
|
||||
@GetMapping("/get")
|
||||
@Operation(summary = "获取单个配音")
|
||||
@Parameter(name = "id", description = "配音编号", required = true, example = "1")
|
||||
public CommonResult<AppTikUserVoiceRespVO> getVoice(@RequestParam("id") Long id) {
|
||||
return success(voiceService.getVoice(id));
|
||||
}
|
||||
|
||||
@PostMapping("/transcribe")
|
||||
@Operation(summary = "手动触发语音识别")
|
||||
@Parameter(name = "id", description = "配音编号", required = true, example = "1")
|
||||
public CommonResult<Boolean> transcribeVoice(@RequestParam("id") Long id) {
|
||||
voiceService.transcribeVoice(id);
|
||||
return success(true);
|
||||
}
|
||||
|
||||
@PostMapping("/tts")
|
||||
@Operation(summary = "CosyVoice 文本转语音")
|
||||
public CommonResult<AppTikVoiceTtsRespVO> synthesizeVoice(@Valid @RequestBody AppTikVoiceTtsReqVO reqVO) {
|
||||
return success(voiceService.synthesizeVoice(reqVO));
|
||||
}
|
||||
|
||||
@PostMapping("/preview")
|
||||
@Operation(summary = "我的音色试听")
|
||||
public CommonResult<AppTikVoicePreviewRespVO> previewVoice(@Valid @RequestBody AppTikVoicePreviewReqVO reqVO) {
|
||||
return success(voiceService.previewVoice(reqVO));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.dal.dataobject;
|
||||
|
||||
import cn.iocoder.yudao.framework.tenant.core.db.TenantBaseDO;
|
||||
import com.baomidou.mybatisplus.annotation.KeySequence;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.*;
|
||||
|
||||
/**
|
||||
* 用户配音 DO
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@TableName("tik_user_voice")
|
||||
@KeySequence("tik_user_voice_seq") // 用于 Oracle、PostgreSQL、Kingbase、DB2、H2 数据库的主键自增。如果是 MySQL 等数据库,可不写。
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
@ToString(callSuper = true)
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class TikUserVoiceDO extends TenantBaseDO {
|
||||
|
||||
/**
|
||||
* 配音编号
|
||||
*/
|
||||
@TableId
|
||||
private Long id;
|
||||
/**
|
||||
* 用户编号
|
||||
*/
|
||||
private Long userId;
|
||||
/**
|
||||
* 配音名称
|
||||
*/
|
||||
private String name;
|
||||
/**
|
||||
* 音频文件编号(关联 infra_file.id)
|
||||
*/
|
||||
private Long fileId;
|
||||
/**
|
||||
* 语音识别内容,为空表示未识别,有值表示已识别
|
||||
*/
|
||||
private String transcription;
|
||||
/**
|
||||
* 语言:zh-CN-简体中文,zh-TW-繁體中文,en-US-English
|
||||
*/
|
||||
private String language;
|
||||
/**
|
||||
* 音色类型:female-女声,male-男声
|
||||
*/
|
||||
private String gender;
|
||||
/**
|
||||
* 备注信息
|
||||
*/
|
||||
private String note;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.dal.mysql;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
/**
|
||||
* 用户配音 Mapper
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Mapper
|
||||
public interface TikUserVoiceMapper extends BaseMapperX<TikUserVoiceDO> {
|
||||
|
||||
default PageResult<TikUserVoiceDO> selectPage(AppTikUserVoicePageReqVO reqVO) {
|
||||
return selectPage(reqVO, new LambdaQueryWrapperX<TikUserVoiceDO>()
|
||||
.eqIfPresent(TikUserVoiceDO::getUserId, reqVO.getUserId())
|
||||
.likeIfPresent(TikUserVoiceDO::getName, reqVO.getName())
|
||||
.orderByDesc(TikUserVoiceDO::getId));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.service;
|
||||
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
|
||||
|
||||
/**
|
||||
* Latentsync 口型同步 Service
|
||||
*/
|
||||
public interface LatentsyncService {
|
||||
|
||||
/**
|
||||
* 提交 302AI Latentsync 任务
|
||||
*
|
||||
* @param reqVO 请求 VO
|
||||
* @return 任务响应
|
||||
*/
|
||||
AppTikLatentsyncSubmitRespVO submitTask(AppTikLatentsyncSubmitReqVO reqVO);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.service;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.LatentsyncClient;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.LatentsyncSubmitResponse;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikLatentsyncSubmitRespVO;
|
||||
import jakarta.validation.Valid;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
|
||||
/**
|
||||
* Latentsync Service 实现
|
||||
*/
|
||||
@Service
|
||||
@Validated
|
||||
@RequiredArgsConstructor
|
||||
public class LatentsyncServiceImpl implements LatentsyncService {
|
||||
|
||||
private final LatentsyncClient latentsyncClient;
|
||||
|
||||
@Override
|
||||
public AppTikLatentsyncSubmitRespVO submitTask(@Valid AppTikLatentsyncSubmitReqVO reqVO) {
|
||||
LatentsyncSubmitRequest request = LatentsyncSubmitRequest.builder()
|
||||
.audioUrl(StrUtil.trim(reqVO.getAudioUrl()))
|
||||
.videoUrl(StrUtil.trim(reqVO.getVideoUrl()))
|
||||
.guidanceScale(reqVO.getGuidanceScale())
|
||||
.seed(reqVO.getSeed())
|
||||
.build();
|
||||
|
||||
LatentsyncSubmitResponse response = latentsyncClient.submitTask(request);
|
||||
AppTikLatentsyncSubmitRespVO respVO = new AppTikLatentsyncSubmitRespVO();
|
||||
respVO.setRequestId(response.getRequestId());
|
||||
respVO.setStatus(response.getStatus());
|
||||
respVO.setQueuePosition(response.getQueuePosition());
|
||||
return respVO;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.service;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceUpdateReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewRespVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsRespVO;
|
||||
|
||||
/**
|
||||
* 用户配音 Service 接口
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
public interface TikUserVoiceService {
|
||||
|
||||
/**
|
||||
* 创建配音(上传文件 + 可选自动识别)
|
||||
*
|
||||
* @param createReqVO 创建请求 VO
|
||||
* @return 配音编号
|
||||
*/
|
||||
Long createVoice(AppTikUserVoiceCreateReqVO createReqVO);
|
||||
|
||||
/**
|
||||
* 更新配音信息
|
||||
*
|
||||
* @param updateReqVO 更新请求 VO
|
||||
*/
|
||||
void updateVoice(AppTikUserVoiceUpdateReqVO updateReqVO);
|
||||
|
||||
/**
|
||||
* 删除配音
|
||||
*
|
||||
* @param id 配音编号
|
||||
*/
|
||||
void deleteVoice(Long id);
|
||||
|
||||
/**
|
||||
* 分页查询
|
||||
*
|
||||
* @param pageReqVO 分页查询条件
|
||||
* @return 配音列表
|
||||
*/
|
||||
PageResult<AppTikUserVoiceRespVO> getVoicePage(AppTikUserVoicePageReqVO pageReqVO);
|
||||
|
||||
/**
|
||||
* 获取单个配音
|
||||
*
|
||||
* @param id 配音编号
|
||||
* @return 配音信息
|
||||
*/
|
||||
AppTikUserVoiceRespVO getVoice(Long id);
|
||||
|
||||
/**
|
||||
* 手动触发语音识别
|
||||
*
|
||||
* @param id 配音编号
|
||||
*/
|
||||
void transcribeVoice(Long id);
|
||||
|
||||
/**
|
||||
* CosyVoice 文本转语音
|
||||
*/
|
||||
AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO);
|
||||
|
||||
/**
|
||||
* 我的音色试听
|
||||
*/
|
||||
AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,864 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.service;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.http.HttpUtil;
|
||||
import cn.hutool.json.JSONArray;
|
||||
import cn.hutool.json.JSONObject;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import cn.iocoder.yudao.framework.common.pojo.CommonResult;
|
||||
import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
|
||||
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
||||
import cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUtils;
|
||||
import cn.iocoder.yudao.module.infra.api.file.FileApi;
|
||||
import cn.iocoder.yudao.module.infra.dal.dataobject.file.FileDO;
|
||||
import cn.iocoder.yudao.module.infra.dal.mysql.file.FileMapper;
|
||||
import cn.iocoder.yudao.module.tik.file.dal.dataobject.TikUserFileDO;
|
||||
import cn.iocoder.yudao.module.tik.file.dal.mysql.TikUserFileMapper;
|
||||
import cn.iocoder.yudao.module.tik.file.service.TikUserFileService;
|
||||
import cn.iocoder.yudao.module.tik.tikhup.service.TikHupService;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.CosyVoiceClient;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsRequest;
|
||||
import cn.iocoder.yudao.module.tik.voice.client.dto.CosyVoiceTtsResult;
|
||||
import cn.iocoder.yudao.module.tik.voice.config.CosyVoiceProperties;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.dataobject.TikUserVoiceDO;
|
||||
import cn.iocoder.yudao.module.tik.voice.dal.mysql.TikUserVoiceMapper;
|
||||
import cn.iocoder.yudao.module.tik.voice.util.ByteArrayMultipartFile;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceCreateReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoicePageReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceRespVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikUserVoiceUpdateReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoicePreviewRespVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsReqVO;
|
||||
import cn.iocoder.yudao.module.tik.voice.vo.AppTikVoiceTtsRespVO;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.redis.core.StringRedisTemplate;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
|
||||
import jakarta.annotation.Resource;
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||
import static cn.iocoder.yudao.module.tik.enmus.ErrorCodeConstants.*;
|
||||
|
||||
/**
|
||||
* 用户配音 Service 实现类
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Service
|
||||
@Validated
|
||||
@Slf4j
|
||||
public class TikUserVoiceServiceImpl implements TikUserVoiceService {
|
||||
|
||||
@Resource
|
||||
private TikUserVoiceMapper voiceMapper;
|
||||
|
||||
@Resource
|
||||
private FileMapper fileMapper;
|
||||
|
||||
@Resource
|
||||
private TikUserFileMapper userFileMapper;
|
||||
|
||||
@Resource
|
||||
private TikUserFileService tikUserFileService;
|
||||
|
||||
@Resource
|
||||
private FileApi fileApi;
|
||||
|
||||
@Resource
|
||||
private TikHupService tikHupService;
|
||||
|
||||
@Resource
|
||||
private CosyVoiceClient cosyVoiceClient;
|
||||
|
||||
@Resource
|
||||
private CosyVoiceProperties cosyVoiceProperties;
|
||||
|
||||
@Resource
|
||||
private StringRedisTemplate stringRedisTemplate;
|
||||
|
||||
/** 预签名URL过期时间(1小时,单位:秒) */
|
||||
private static final int PRESIGN_URL_EXPIRATION_SECONDS = 3600;
|
||||
private static final String PREVIEW_CACHE_PREFIX = "tik:voice:preview:";
|
||||
private static final String SYNTH_CACHE_PREFIX = "tik:voice:tts:";
|
||||
private static final long PREVIEW_CACHE_TTL_SECONDS = 3600;
|
||||
private static final long SYNTH_CACHE_TTL_SECONDS = 24 * 3600;
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public Long createVoice(AppTikUserVoiceCreateReqVO createReqVO) {
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
|
||||
// 1. 校验文件是否存在且属于voice分类
|
||||
FileDO fileDO = fileMapper.selectById(createReqVO.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 验证文件分类是否为voice(通过tik_user_file表查询)
|
||||
TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
|
||||
.eq(TikUserFileDO::getFileId, createReqVO.getFileId())
|
||||
.eq(TikUserFileDO::getFileCategory, "voice")
|
||||
.eq(TikUserFileDO::getUserId, userId));
|
||||
if (userFile == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS, "文件不存在或不属于voice分类");
|
||||
}
|
||||
|
||||
// 2. 校验名称是否重复
|
||||
TikUserVoiceDO existingVoice = voiceMapper.selectOne(new LambdaQueryWrapperX<TikUserVoiceDO>()
|
||||
.eq(TikUserVoiceDO::getUserId, userId)
|
||||
.eq(TikUserVoiceDO::getName, createReqVO.getName())
|
||||
.eq(TikUserVoiceDO::getDeleted, false));
|
||||
if (existingVoice != null) {
|
||||
throw exception(VOICE_NAME_DUPLICATE);
|
||||
}
|
||||
|
||||
// 3. 创建配音记录
|
||||
TikUserVoiceDO voice = new TikUserVoiceDO()
|
||||
.setUserId(userId)
|
||||
.setName(createReqVO.getName())
|
||||
.setFileId(createReqVO.getFileId())
|
||||
.setLanguage(StrUtil.blankToDefault(createReqVO.getLanguage(), "zh-CN"))
|
||||
.setGender(StrUtil.blankToDefault(createReqVO.getGender(), "female"))
|
||||
.setNote(createReqVO.getNote())
|
||||
.setTranscription(null); // 初始为空,表示未识别
|
||||
voiceMapper.insert(voice);
|
||||
|
||||
// 4. 如果开启自动识别,异步执行识别
|
||||
if (Boolean.TRUE.equals(createReqVO.getAutoTranscribe())) {
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
log.info("[createVoice][开启自动识别,配音编号({}),文件ID({}),预签名URL({})]",
|
||||
voice.getId(), fileDO.getId(), fileAccessUrl);
|
||||
asyncTranscribeVoice(voice.getId(), fileAccessUrl);
|
||||
}
|
||||
|
||||
log.info("[createVoice][用户({})创建配音成功,配音编号({})]", userId, voice.getId());
|
||||
return voice.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public void updateVoice(AppTikUserVoiceUpdateReqVO updateReqVO) {
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
|
||||
// 1. 校验配音是否存在且属于当前用户
|
||||
TikUserVoiceDO voice = voiceMapper.selectById(updateReqVO.getId());
|
||||
if (voice == null || !voice.getUserId().equals(userId)) {
|
||||
throw exception(VOICE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 2. 如果更新名称,校验名称是否重复
|
||||
if (StrUtil.isNotBlank(updateReqVO.getName()) && !updateReqVO.getName().equals(voice.getName())) {
|
||||
TikUserVoiceDO existingVoice = voiceMapper.selectOne(new LambdaQueryWrapperX<TikUserVoiceDO>()
|
||||
.eq(TikUserVoiceDO::getUserId, userId)
|
||||
.eq(TikUserVoiceDO::getName, updateReqVO.getName())
|
||||
.eq(TikUserVoiceDO::getDeleted, false)
|
||||
.ne(TikUserVoiceDO::getId, updateReqVO.getId()));
|
||||
if (existingVoice != null) {
|
||||
throw exception(VOICE_NAME_DUPLICATE);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 更新配音信息
|
||||
TikUserVoiceDO updateObj = new TikUserVoiceDO()
|
||||
.setId(updateReqVO.getId());
|
||||
if (StrUtil.isNotBlank(updateReqVO.getName())) {
|
||||
updateObj.setName(updateReqVO.getName());
|
||||
}
|
||||
if (StrUtil.isNotBlank(updateReqVO.getLanguage())) {
|
||||
updateObj.setLanguage(updateReqVO.getLanguage());
|
||||
}
|
||||
if (StrUtil.isNotBlank(updateReqVO.getGender())) {
|
||||
updateObj.setGender(updateReqVO.getGender());
|
||||
}
|
||||
if (updateReqVO.getNote() != null) {
|
||||
updateObj.setNote(updateReqVO.getNote());
|
||||
}
|
||||
if (updateReqVO.getTranscription() != null) {
|
||||
updateObj.setTranscription(updateReqVO.getTranscription());
|
||||
}
|
||||
voiceMapper.updateById(updateObj);
|
||||
|
||||
log.info("[updateVoice][用户({})更新配音成功,配音编号({})]", userId, updateReqVO.getId());
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public void deleteVoice(Long id) {
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
|
||||
// 1. 校验配音是否存在且属于当前用户
|
||||
TikUserVoiceDO voice = voiceMapper.selectById(id);
|
||||
if (voice == null || !voice.getUserId().equals(userId)) {
|
||||
throw exception(VOICE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 2. 删除音频文件(含OSS)
|
||||
TikUserFileDO userFile = userFileMapper.selectOne(new LambdaQueryWrapperX<TikUserFileDO>()
|
||||
.eq(TikUserFileDO::getFileId, voice.getFileId())
|
||||
.eq(TikUserFileDO::getUserId, userId));
|
||||
if (userFile != null) {
|
||||
tikUserFileService.deleteFiles(Collections.singletonList(userFile.getId()));
|
||||
}
|
||||
|
||||
// 3. 逻辑删除配音记录
|
||||
voiceMapper.deleteById(id);
|
||||
|
||||
log.info("[deleteVoice][用户({})删除配音成功,配音编号({})]", userId, id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PageResult<AppTikUserVoiceRespVO> getVoicePage(AppTikUserVoicePageReqVO pageReqVO) {
|
||||
// 自动填充当前登录用户ID
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
pageReqVO.setUserId(userId);
|
||||
|
||||
// 查询配音列表
|
||||
PageResult<TikUserVoiceDO> pageResult = voiceMapper.selectPage(pageReqVO);
|
||||
|
||||
// 批量查询文件信息,避免 N+1 查询
|
||||
Map<Long, FileDO> fileMap = new HashMap<>();
|
||||
if (CollUtil.isNotEmpty(pageResult.getList())) {
|
||||
List<Long> fileIds = pageResult.getList().stream()
|
||||
.map(TikUserVoiceDO::getFileId)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (CollUtil.isNotEmpty(fileIds)) {
|
||||
List<FileDO> files = fileMapper.selectBatchIds(fileIds);
|
||||
Map<Long, FileDO> tempFileMap = files.stream()
|
||||
.collect(Collectors.toMap(FileDO::getId, file -> file));
|
||||
fileMap.putAll(tempFileMap);
|
||||
}
|
||||
}
|
||||
|
||||
// 转换为VO并关联查询文件信息
|
||||
return CollectionUtils.convertPage(pageResult, voice -> {
|
||||
AppTikUserVoiceRespVO vo = BeanUtils.toBean(voice, AppTikUserVoiceRespVO.class);
|
||||
|
||||
// 通过 file_id 关联查询文件URL,并生成预签名URL
|
||||
FileDO fileDO = fileMap.get(voice.getFileId());
|
||||
if (fileDO != null) {
|
||||
// 生成预签名URL(1小时有效期)
|
||||
String presignedUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
vo.setFileUrl(presignedUrl);
|
||||
}
|
||||
|
||||
return vo;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppTikUserVoiceRespVO getVoice(Long id) {
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
|
||||
// 1. 查询配音
|
||||
TikUserVoiceDO voice = voiceMapper.selectById(id);
|
||||
if (voice == null || !voice.getUserId().equals(userId)) {
|
||||
throw exception(VOICE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 2. 转换为VO并关联查询文件信息
|
||||
AppTikUserVoiceRespVO vo = BeanUtils.toBean(voice, AppTikUserVoiceRespVO.class);
|
||||
|
||||
// 通过 file_id 关联查询文件URL,并生成预签名URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO != null) {
|
||||
// 生成预签名URL(1小时有效期)
|
||||
String presignedUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
vo.setFileUrl(presignedUrl);
|
||||
}
|
||||
|
||||
return vo;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public void transcribeVoice(Long id) {
|
||||
Long userId = SecurityFrameworkUtils.getLoginUserId();
|
||||
|
||||
// 1. 校验配音是否存在且属于当前用户
|
||||
TikUserVoiceDO voice = voiceMapper.selectById(id);
|
||||
if (voice == null || !voice.getUserId().equals(userId)) {
|
||||
throw exception(VOICE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 2. 获取文件URL
|
||||
FileDO fileDO = fileMapper.selectById(voice.getFileId());
|
||||
if (fileDO == null) {
|
||||
throw exception(VOICE_FILE_NOT_EXISTS);
|
||||
}
|
||||
|
||||
// 3. 异步执行识别
|
||||
String fileAccessUrl = fileApi.presignGetUrl(fileDO.getUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
asyncTranscribeVoice(id, fileAccessUrl);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppTikVoiceTtsRespVO synthesizeVoice(AppTikVoiceTtsReqVO reqVO) {
|
||||
String finalText = determineSynthesisText(
|
||||
reqVO.getTranscriptionText(),
|
||||
reqVO.getInputText(),
|
||||
false);
|
||||
finalText = appendEmotion(finalText, reqVO.getEmotion());
|
||||
|
||||
String cacheKey = buildCacheKey(SYNTH_CACHE_PREFIX,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getFileUrl(),
|
||||
finalText,
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
reqVO.getEmotion(),
|
||||
reqVO.getAudioFormat(),
|
||||
reqVO.getSampleRate());
|
||||
|
||||
SynthCacheEntry synthCache = getSynthCache(cacheKey);
|
||||
if (synthCache != null) {
|
||||
return buildSynthResponseFromCache(reqVO, synthCache);
|
||||
}
|
||||
|
||||
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
|
||||
finalText,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getModel(),
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
reqVO.getSampleRate(),
|
||||
reqVO.getAudioFormat(),
|
||||
false
|
||||
));
|
||||
|
||||
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
|
||||
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
|
||||
"file",
|
||||
buildFileName(voiceId, format),
|
||||
resolveContentType(format),
|
||||
ttsResult.getAudio()
|
||||
);
|
||||
Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
|
||||
|
||||
AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
|
||||
respVO.setFileId(fileId);
|
||||
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
|
||||
respVO.setFormat(format);
|
||||
respVO.setSampleRate(ttsResult.getSampleRate());
|
||||
respVO.setRequestId(ttsResult.getRequestId());
|
||||
respVO.setVoiceId(voiceId);
|
||||
|
||||
saveSynthCache(cacheKey, new SynthCacheEntry(
|
||||
Base64.getEncoder().encodeToString(ttsResult.getAudio()),
|
||||
format,
|
||||
ttsResult.getSampleRate(),
|
||||
ttsResult.getRequestId(),
|
||||
voiceId
|
||||
));
|
||||
return respVO;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AppTikVoicePreviewRespVO previewVoice(AppTikVoicePreviewReqVO reqVO) {
|
||||
String finalText = determineSynthesisText(
|
||||
reqVO.getTranscriptionText(),
|
||||
reqVO.getInputText(),
|
||||
true);
|
||||
finalText = appendEmotion(finalText, reqVO.getEmotion());
|
||||
|
||||
String cacheKey = buildCacheKey(PREVIEW_CACHE_PREFIX,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getFileUrl(),
|
||||
finalText,
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
reqVO.getEmotion(),
|
||||
reqVO.getAudioFormat(),
|
||||
null);
|
||||
PreviewCacheEntry previewCache = getPreviewCache(cacheKey);
|
||||
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
|
||||
if (previewCache != null) {
|
||||
String cachedUrl = fileApi.presignGetUrl(previewCache.getFileUrl(), PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
return buildPreviewResp(previewCache, cachedUrl, voiceId);
|
||||
}
|
||||
|
||||
CosyVoiceTtsResult ttsResult = cosyVoiceClient.synthesize(buildTtsRequest(
|
||||
finalText,
|
||||
reqVO.getVoiceId(),
|
||||
reqVO.getModel(),
|
||||
reqVO.getSpeechRate(),
|
||||
reqVO.getVolume(),
|
||||
null,
|
||||
reqVO.getAudioFormat(),
|
||||
true
|
||||
));
|
||||
|
||||
String format = defaultFormat(ttsResult.getFormat(), reqVO.getAudioFormat());
|
||||
voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cosyVoiceProperties.getDefaultVoiceId());
|
||||
String objectName = buildFileName(voiceId, format);
|
||||
String fileUrl = fileApi.createFile(ttsResult.getAudio(), objectName, "voice/preview", resolveContentType(format));
|
||||
String presignUrl = fileApi.presignGetUrl(fileUrl, PRESIGN_URL_EXPIRATION_SECONDS);
|
||||
|
||||
PreviewCacheEntry entry = new PreviewCacheEntry(fileUrl, format, ttsResult.getSampleRate(), ttsResult.getRequestId());
|
||||
savePreviewCache(cacheKey, entry);
|
||||
return buildPreviewResp(entry, presignUrl, voiceId);
|
||||
}
|
||||
|
||||
private CosyVoiceTtsRequest buildTtsRequest(String text,
|
||||
String voiceId,
|
||||
String model,
|
||||
Float speechRate,
|
||||
Float volume,
|
||||
Integer sampleRate,
|
||||
String audioFormat,
|
||||
boolean preview) {
|
||||
return CosyVoiceTtsRequest.builder()
|
||||
.text(text)
|
||||
.voiceId(voiceId)
|
||||
.model(model)
|
||||
.speechRate(speechRate)
|
||||
.volume(volume)
|
||||
.sampleRate(sampleRate)
|
||||
.audioFormat(audioFormat)
|
||||
.preview(preview)
|
||||
.build();
|
||||
}
|
||||
|
||||
private String defaultFormat(String responseFormat, String requestFormat) {
|
||||
return StrUtil.blankToDefault(responseFormat,
|
||||
StrUtil.blankToDefault(requestFormat, cosyVoiceProperties.getAudioFormat()));
|
||||
}
|
||||
|
||||
private String buildFileName(String voiceId, String format) {
|
||||
String safeVoice = StrUtil.blankToDefault(voiceId, "voice")
|
||||
.replaceAll("[^a-zA-Z0-9_-]", "");
|
||||
return safeVoice + "-" + System.currentTimeMillis() + "." + format;
|
||||
}
|
||||
|
||||
private String resolveContentType(String format) {
|
||||
if ("wav".equalsIgnoreCase(format)) {
|
||||
return "audio/wav";
|
||||
}
|
||||
if ("mp3".equalsIgnoreCase(format)) {
|
||||
return "audio/mpeg";
|
||||
}
|
||||
if ("flac".equalsIgnoreCase(format)) {
|
||||
return "audio/flac";
|
||||
}
|
||||
return "audio/mpeg";
|
||||
}
|
||||
|
||||
private String determineSynthesisText(String transcriptionText, String inputText, boolean allowFallback) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
if (StrUtil.isNotBlank(transcriptionText)) {
|
||||
builder.append(transcriptionText.trim());
|
||||
}
|
||||
if (StrUtil.isNotBlank(inputText)) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append("\n");
|
||||
}
|
||||
builder.append(inputText.trim());
|
||||
}
|
||||
if (builder.length() > 0) {
|
||||
return builder.toString();
|
||||
}
|
||||
if (allowFallback) {
|
||||
return cosyVoiceProperties.getPreviewText();
|
||||
}
|
||||
throw exception(VOICE_TTS_FAILED, "请提供需要合成的文本内容");
|
||||
}
|
||||
|
||||
private String appendEmotion(String text, String emotion) {
|
||||
if (StrUtil.isBlank(text)) {
|
||||
return text;
|
||||
}
|
||||
if (StrUtil.isBlank(emotion) || "neutral".equalsIgnoreCase(emotion)) {
|
||||
return text;
|
||||
}
|
||||
String emotionLabel = switch (emotion.toLowerCase()) {
|
||||
case "happy" -> "高兴";
|
||||
case "angry" -> "愤怒";
|
||||
case "sad" -> "悲伤";
|
||||
case "scared" -> "害怕";
|
||||
case "disgusted" -> "厌恶";
|
||||
case "surprised" -> "惊讶";
|
||||
default -> emotion;
|
||||
};
|
||||
return "【情感:" + emotionLabel + "】" + text;
|
||||
}
|
||||
|
||||
private String buildCacheKey(String prefix,
|
||||
String voiceId,
|
||||
String fileUrl,
|
||||
String text,
|
||||
Float speechRate,
|
||||
Float volume,
|
||||
String emotion,
|
||||
String audioFormat,
|
||||
Integer sampleRate) {
|
||||
String identifier = StrUtil.isNotBlank(voiceId)
|
||||
? voiceId
|
||||
: StrUtil.blankToDefault(fileUrl, "no-voice");
|
||||
String payload = StrUtil.join("|",
|
||||
identifier,
|
||||
text,
|
||||
speechRate != null ? speechRate : "1.0",
|
||||
volume != null ? volume : "0",
|
||||
StrUtil.blankToDefault(emotion, "neutral"),
|
||||
StrUtil.blankToDefault(audioFormat, cosyVoiceProperties.getAudioFormat()),
|
||||
sampleRate != null ? sampleRate : cosyVoiceProperties.getSampleRate());
|
||||
String hash = cn.hutool.crypto.SecureUtil.sha256(payload);
|
||||
return prefix + hash;
|
||||
}
|
||||
|
||||
private PreviewCacheEntry getPreviewCache(String key) {
|
||||
try {
|
||||
String json = stringRedisTemplate.opsForValue().get(key);
|
||||
if (StrUtil.isBlank(json)) {
|
||||
return null;
|
||||
}
|
||||
return JSONUtil.toBean(json, PreviewCacheEntry.class);
|
||||
} catch (Exception ex) {
|
||||
log.warn("[previewVoice][cache read failed][key={}]", key, ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private void savePreviewCache(String key, PreviewCacheEntry entry) {
|
||||
try {
|
||||
stringRedisTemplate.opsForValue().set(
|
||||
key,
|
||||
JSONUtil.toJsonStr(entry),
|
||||
PREVIEW_CACHE_TTL_SECONDS,
|
||||
TimeUnit.SECONDS);
|
||||
} catch (Exception ex) {
|
||||
log.warn("[previewVoice][cache write failed][key={}]", key, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private SynthCacheEntry getSynthCache(String key) {
|
||||
try {
|
||||
String json = stringRedisTemplate.opsForValue().get(key);
|
||||
if (StrUtil.isBlank(json)) {
|
||||
return null;
|
||||
}
|
||||
return JSONUtil.toBean(json, SynthCacheEntry.class);
|
||||
} catch (Exception ex) {
|
||||
log.warn("[synthesizeVoice][cache read failed][key={}]", key, ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private void saveSynthCache(String key, SynthCacheEntry entry) {
|
||||
try {
|
||||
stringRedisTemplate.opsForValue().set(
|
||||
key,
|
||||
JSONUtil.toJsonStr(entry),
|
||||
SYNTH_CACHE_TTL_SECONDS,
|
||||
TimeUnit.SECONDS);
|
||||
} catch (Exception ex) {
|
||||
log.warn("[synthesizeVoice][cache write failed][key={}]", key, ex);
|
||||
}
|
||||
}
|
||||
|
||||
private AppTikVoiceTtsRespVO buildSynthResponseFromCache(AppTikVoiceTtsReqVO reqVO, SynthCacheEntry cache) {
|
||||
byte[] audioBytes = Base64.getDecoder().decode(cache.getAudioBase64());
|
||||
String format = defaultFormat(cache.getFormat(), reqVO.getAudioFormat());
|
||||
String voiceId = StrUtil.blankToDefault(reqVO.getVoiceId(), cache.getVoiceId());
|
||||
ByteArrayMultipartFile multipartFile = new ByteArrayMultipartFile(
|
||||
"file",
|
||||
buildFileName(voiceId, format),
|
||||
resolveContentType(format),
|
||||
audioBytes
|
||||
);
|
||||
Long fileId = tikUserFileService.uploadFile(multipartFile, "audio", null);
|
||||
|
||||
AppTikVoiceTtsRespVO respVO = new AppTikVoiceTtsRespVO();
|
||||
respVO.setFileId(fileId);
|
||||
respVO.setAudioUrl(tikUserFileService.getAudioPlayUrl(fileId));
|
||||
respVO.setFormat(format);
|
||||
respVO.setSampleRate(cache.getSampleRate());
|
||||
respVO.setRequestId(cache.getRequestId());
|
||||
respVO.setVoiceId(voiceId);
|
||||
return respVO;
|
||||
}
|
||||
|
||||
private AppTikVoicePreviewRespVO buildPreviewResp(PreviewCacheEntry entry, String presignUrl, String voiceId) {
|
||||
AppTikVoicePreviewRespVO respVO = new AppTikVoicePreviewRespVO();
|
||||
respVO.setAudioUrl(presignUrl);
|
||||
respVO.setFormat(entry.getFormat());
|
||||
respVO.setSampleRate(entry.getSampleRate());
|
||||
respVO.setRequestId(entry.getRequestId());
|
||||
respVO.setVoiceId(voiceId);
|
||||
return respVO;
|
||||
}
|
||||
|
||||
private static class PreviewCacheEntry {
|
||||
private String fileUrl;
|
||||
private String format;
|
||||
private Integer sampleRate;
|
||||
private String requestId;
|
||||
|
||||
public PreviewCacheEntry() {}
|
||||
|
||||
public PreviewCacheEntry(String fileUrl, String format, Integer sampleRate, String requestId) {
|
||||
this.fileUrl = fileUrl;
|
||||
this.format = format;
|
||||
this.sampleRate = sampleRate;
|
||||
this.requestId = requestId;
|
||||
}
|
||||
|
||||
public String getFileUrl() {
|
||||
return fileUrl;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public Integer getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public String getRequestId() {
|
||||
return requestId;
|
||||
}
|
||||
}
|
||||
|
||||
private static class SynthCacheEntry {
|
||||
private String audioBase64;
|
||||
private String format;
|
||||
private Integer sampleRate;
|
||||
private String requestId;
|
||||
private String voiceId;
|
||||
|
||||
public SynthCacheEntry() {}
|
||||
|
||||
public SynthCacheEntry(String audioBase64, String format, Integer sampleRate, String requestId, String voiceId) {
|
||||
this.audioBase64 = audioBase64;
|
||||
this.format = format;
|
||||
this.sampleRate = sampleRate;
|
||||
this.requestId = requestId;
|
||||
this.voiceId = voiceId;
|
||||
}
|
||||
|
||||
public String getAudioBase64() {
|
||||
return audioBase64;
|
||||
}
|
||||
|
||||
public String getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
public Integer getSampleRate() {
|
||||
return sampleRate;
|
||||
}
|
||||
|
||||
public String getRequestId() {
|
||||
return requestId;
|
||||
}
|
||||
|
||||
public String getVoiceId() {
|
||||
return voiceId;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 异步执行语音识别
|
||||
*
|
||||
* @param voiceId 配音编号
|
||||
* @param fileUrl 文件URL
|
||||
*/
|
||||
@Async
|
||||
public void asyncTranscribeVoice(Long voiceId, String fileUrl) {
|
||||
try {
|
||||
log.info("[asyncTranscribeVoice][开始识别,配音编号({}),文件URL({})]", voiceId, fileUrl);
|
||||
Object result = tikHupService.videoToCharacters2(Collections.singletonList(fileUrl));
|
||||
|
||||
// 解析识别结果
|
||||
String transcription = extractTranscription(result);
|
||||
|
||||
if (StrUtil.isNotBlank(transcription)) {
|
||||
// 更新识别结果
|
||||
TikUserVoiceDO updateObj = new TikUserVoiceDO()
|
||||
.setId(voiceId)
|
||||
.setTranscription(transcription);
|
||||
voiceMapper.updateById(updateObj);
|
||||
log.info("[asyncTranscribeVoice][识别成功,配音编号({}),文本长度({})]", voiceId, transcription.length());
|
||||
} else {
|
||||
log.warn("[asyncTranscribeVoice][识别结果为空,配音编号({}),返回码({})]",
|
||||
voiceId, result instanceof CommonResult ? ((CommonResult<?>) result).getCode() : "未知");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("[asyncTranscribeVoice][识别失败,配音编号({}),文件URL({})]", voiceId, fileUrl, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从识别结果中提取文字内容
|
||||
* 根据 TikHupService.videoToCharacters* 的实际返回格式进行解析
|
||||
*
|
||||
* @param result 识别结果
|
||||
* @return 文字内容
|
||||
*/
|
||||
private String extractTranscription(Object result) {
|
||||
if (result == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
if (result instanceof CommonResult<?> commonResult) {
|
||||
if (!commonResult.isSuccess()) {
|
||||
log.warn("[extractTranscription][识别失败,code({}),msg({})]",
|
||||
commonResult.getCode(), commonResult.getMsg());
|
||||
return null;
|
||||
}
|
||||
Object data = commonResult.getData();
|
||||
if (data == null) {
|
||||
return null;
|
||||
}
|
||||
String parsed = parseTranscriptionText(data);
|
||||
if (StrUtil.isNotBlank(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
return data.toString();
|
||||
}
|
||||
|
||||
String parsed = parseTranscriptionText(result);
|
||||
if (StrUtil.isNotBlank(parsed)) {
|
||||
return parsed;
|
||||
}
|
||||
return result.toString();
|
||||
} catch (Exception e) {
|
||||
log.warn("[extractTranscription][解析识别结果失败]", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static final List<String> TRANSCRIPTION_TEXT_KEYS =
|
||||
Arrays.asList("text", "sentence", "result", "content", "transcript", "output_text", "display_text");
|
||||
|
||||
private String parseTranscriptionText(Object rawData) {
|
||||
if (rawData == null) {
|
||||
return null;
|
||||
}
|
||||
String rawString = rawData instanceof String ? (String) rawData : JSONUtil.toJsonStr(rawData);
|
||||
if (StrUtil.isBlank(rawString)) {
|
||||
return null;
|
||||
}
|
||||
if (!JSONUtil.isTypeJSON(rawString)) {
|
||||
return rawString;
|
||||
}
|
||||
try {
|
||||
Object json = JSONUtil.parse(rawString);
|
||||
String localText = extractTextFromJson(json);
|
||||
if (StrUtil.isNotBlank(localText)) {
|
||||
return localText;
|
||||
}
|
||||
if (json instanceof JSONObject jsonObject) {
|
||||
JSONArray results = jsonObject.getJSONArray("results");
|
||||
if (CollUtil.isEmpty(results)) {
|
||||
return null;
|
||||
}
|
||||
Object lastObj = results.get(results.size() - 1);
|
||||
if (!(lastObj instanceof JSONObject lastResult)) {
|
||||
return null;
|
||||
}
|
||||
String transcriptionUrl = lastResult.getStr("transcription_url");
|
||||
if (StrUtil.isBlank(transcriptionUrl)) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder builder = new StringBuilder();
|
||||
appendRemoteTranscription(builder, transcriptionUrl);
|
||||
return builder.length() > 0 ? builder.toString().trim() : null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[parseTranscriptionText][解析Paraformer结果失败]", e);
|
||||
}
|
||||
return rawString;
|
||||
}
|
||||
|
||||
private void appendRemoteTranscription(StringBuilder builder, String transcriptionUrl) {
|
||||
if (StrUtil.isBlank(transcriptionUrl)) {
|
||||
return;
|
||||
}
|
||||
String remoteContent = fetchRemoteTranscription(transcriptionUrl);
|
||||
if (StrUtil.isBlank(remoteContent)) {
|
||||
return;
|
||||
}
|
||||
String remoteText = extractTextFromJson(JSONUtil.parse(remoteContent));
|
||||
if (StrUtil.isNotBlank(remoteText)) {
|
||||
appendLine(builder, remoteText);
|
||||
}
|
||||
}
|
||||
|
||||
private String extractTextFromJson(Object json) {
|
||||
if (json == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder builder = new StringBuilder();
|
||||
collectTranscriptionText(json, builder);
|
||||
return builder.length() > 0 ? builder.toString().trim() : null;
|
||||
}
|
||||
|
||||
private String fetchRemoteTranscription(String url) {
|
||||
try {
|
||||
String body = HttpUtil.get(url);
|
||||
if (StrUtil.isNotBlank(body)) {
|
||||
return body;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("[fetchRemoteTranscription][下载转写文本失败,url({})]", url, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void collectTranscriptionText(Object node, StringBuilder builder) {
|
||||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
if (node instanceof JSONObject jsonObject) {
|
||||
for (String key : jsonObject.keySet()) {
|
||||
Object value = jsonObject.get(key);
|
||||
if (value == null) {
|
||||
continue;
|
||||
}
|
||||
if (value instanceof CharSequence && TRANSCRIPTION_TEXT_KEYS.contains(key)) {
|
||||
appendLine(builder, value.toString());
|
||||
} else if (value instanceof JSONObject || value instanceof JSONArray) {
|
||||
collectTranscriptionText(value, builder);
|
||||
}
|
||||
}
|
||||
} else if (node instanceof JSONArray jsonArray) {
|
||||
for (Object item : jsonArray) {
|
||||
collectTranscriptionText(item, builder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void appendLine(StringBuilder builder, String line) {
|
||||
String normalized = StrUtil.trim(line);
|
||||
if (StrUtil.isBlank(normalized)) {
|
||||
return;
|
||||
}
|
||||
if (builder.length() > 0) {
|
||||
builder.append('\n');
|
||||
}
|
||||
builder.append(normalized);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.util;
|
||||
|
||||
import org.springframework.util.FileCopyUtils;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* 仅用于在服务内部上传的内存文件
|
||||
*/
|
||||
public class ByteArrayMultipartFile implements MultipartFile {
|
||||
|
||||
private final String name;
|
||||
private final String originalFilename;
|
||||
private final String contentType;
|
||||
private final byte[] content;
|
||||
|
||||
public ByteArrayMultipartFile(String name, String originalFilename, String contentType, byte[] content) {
|
||||
this.name = name;
|
||||
this.originalFilename = originalFilename;
|
||||
this.contentType = contentType;
|
||||
this.content = content != null ? content : new byte[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getOriginalFilename() {
|
||||
return originalFilename;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
return contentType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isEmpty() {
|
||||
return content.length == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSize() {
|
||||
return content.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getBytes() {
|
||||
return content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputStream getInputStream() {
|
||||
return new ByteArrayInputStream(content);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transferTo(File dest) throws IOException {
|
||||
FileCopyUtils.copy(content, dest);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.Max;
|
||||
import jakarta.validation.constraints.Min;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* Latentsync 提交请求 VO
|
||||
*/
|
||||
@Data
|
||||
public class AppTikLatentsyncSubmitReqVO {
|
||||
|
||||
@Schema(description = "音频 URL(需公网可访问)", requiredMode = Schema.RequiredMode.REQUIRED,
|
||||
example = "https://example.com/audio.wav")
|
||||
@NotBlank(message = "音频地址不能为空")
|
||||
@Size(max = 1024, message = "音频地址长度不能超过 1024 字符")
|
||||
private String audioUrl;
|
||||
|
||||
@Schema(description = "视频 URL(需公网可访问)", requiredMode = Schema.RequiredMode.REQUIRED,
|
||||
example = "https://example.com/video.mp4")
|
||||
@NotBlank(message = "视频地址不能为空")
|
||||
@Size(max = 1024, message = "视频地址长度不能超过 1024 字符")
|
||||
private String videoUrl;
|
||||
|
||||
@Schema(description = "guidance_scale,范围 1-2(默认 1)", example = "1")
|
||||
@Min(value = 1, message = "guidanceScale 不能小于 1")
|
||||
@Max(value = 2, message = "guidanceScale 不能大于 2")
|
||||
private Integer guidanceScale;
|
||||
|
||||
@Schema(description = "随机种子(默认 8888)", example = "8888")
|
||||
private Integer seed;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* Latentsync 提交响应 VO
|
||||
*/
|
||||
@Data
|
||||
public class AppTikLatentsyncSubmitRespVO {
|
||||
|
||||
@Schema(description = "Latentsync 任务 ID", example = "8eed0b9b-6103-4357-a57b-9f135a8c3276")
|
||||
private String requestId;
|
||||
|
||||
@Schema(description = "官方状态,如 IN_QUEUE、PROCESSING、SUCCEEDED", example = "IN_QUEUE")
|
||||
private String status;
|
||||
|
||||
@Schema(description = "当前排队位置", example = "0")
|
||||
private Integer queuePosition;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 用户 App - 创建配音 Request VO
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Schema(description = "用户 App - 创建配音 Request VO")
|
||||
@Data
|
||||
public class AppTikUserVoiceCreateReqVO {
|
||||
|
||||
@Schema(description = "配音名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "我的配音")
|
||||
@NotBlank(message = "配音名称不能为空")
|
||||
private String name;
|
||||
|
||||
@Schema(description = "音频文件编号(关联 infra_file.id)", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
@NotNull(message = "音频文件编号不能为空")
|
||||
private Long fileId;
|
||||
|
||||
@Schema(description = "是否自动识别", example = "false")
|
||||
private Boolean autoTranscribe;
|
||||
|
||||
@Schema(description = "语言:zh-CN-简体中文,zh-TW-繁體中文,en-US-English", example = "zh-CN")
|
||||
private String language;
|
||||
|
||||
@Schema(description = "音色类型:female-女声,male-男声", example = "female")
|
||||
private String gender;
|
||||
|
||||
@Schema(description = "备注", example = "这是一个测试配音")
|
||||
private String note;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.pojo.PageParam;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 用户 App - 用户配音分页 Request VO
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Schema(description = "用户 App - 用户配音分页 Request VO")
|
||||
@Data
|
||||
public class AppTikUserVoicePageReqVO extends PageParam {
|
||||
|
||||
@Schema(description = "用户编号(自动填充,无需传递)")
|
||||
private Long userId;
|
||||
|
||||
@Schema(description = "配音名称(模糊查询)", example = "我的配音")
|
||||
private String name;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* 用户 App - 用户配音 Response VO
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Schema(description = "用户 App - 用户配音 Response VO")
|
||||
@Data
|
||||
public class AppTikUserVoiceRespVO {
|
||||
|
||||
@Schema(description = "配音编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
private Long id;
|
||||
|
||||
@Schema(description = "配音名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "我的配音")
|
||||
private String name;
|
||||
|
||||
@Schema(description = "音频文件编号(关联 infra_file.id)", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
private Long fileId;
|
||||
|
||||
@Schema(description = "文件访问URL(通过 file_id 关联查询获取)")
|
||||
private String fileUrl;
|
||||
|
||||
@Schema(description = "语音识别内容", example = "这是识别出的文字内容")
|
||||
private String transcription;
|
||||
|
||||
@Schema(description = "语言:zh-CN-简体中文,zh-TW-繁體中文,en-US-English", example = "zh-CN")
|
||||
private String language;
|
||||
|
||||
@Schema(description = "音色类型:female-女声,male-男声", example = "female")
|
||||
private String gender;
|
||||
|
||||
@Schema(description = "备注", example = "这是一个测试配音")
|
||||
private String note;
|
||||
|
||||
@Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime createTime;
|
||||
|
||||
@Schema(description = "更新时间", requiredMode = Schema.RequiredMode.REQUIRED)
|
||||
private LocalDateTime updateTime;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 用户 App - 更新配音 Request VO
|
||||
*
|
||||
* @author 芋道源码
|
||||
*/
|
||||
@Schema(description = "用户 App - 更新配音 Request VO")
|
||||
@Data
|
||||
public class AppTikUserVoiceUpdateReqVO {
|
||||
|
||||
@Schema(description = "配音编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
|
||||
@NotNull(message = "配音编号不能为空")
|
||||
private Long id;
|
||||
|
||||
@Schema(description = "配音名称", example = "我的配音")
|
||||
private String name;
|
||||
|
||||
@Schema(description = "语言:zh-CN-简体中文,zh-TW-繁體中文,en-US-English", example = "zh-CN")
|
||||
private String language;
|
||||
|
||||
@Schema(description = "音色类型:female-女声,male-男声", example = "female")
|
||||
private String gender;
|
||||
|
||||
@Schema(description = "备注", example = "这是一个测试配音")
|
||||
private String note;
|
||||
|
||||
@Schema(description = "识别内容", example = "识别文字,可手动编辑")
|
||||
private String transcription;
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 我的音色试听请求
|
||||
*/
|
||||
@Data
|
||||
public class AppTikVoicePreviewReqVO {
|
||||
|
||||
@Schema(description = "输入文本")
|
||||
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
|
||||
private String inputText;
|
||||
|
||||
@Schema(description = "识别文本,用于拼接")
|
||||
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
|
||||
private String transcriptionText;
|
||||
|
||||
@Schema(description = "音色 ID(CosyVoice voiceId)")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)")
|
||||
private String fileUrl;
|
||||
|
||||
@Schema(description = "模型名称,默认 cosyvoice-v2")
|
||||
private String model;
|
||||
|
||||
@Schema(description = "语速", example = "1.0")
|
||||
private Float speechRate;
|
||||
|
||||
@Schema(description = "音量", example = "0")
|
||||
private Float volume;
|
||||
|
||||
@Schema(description = "情感", example = "neutral")
|
||||
private String emotion;
|
||||
|
||||
@Schema(description = "音频格式,默认 wav")
|
||||
private String audioFormat;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Schema(description = "音色试听响应")
|
||||
public class AppTikVoicePreviewRespVO {
|
||||
|
||||
@Schema(description = "音频播放地址(预签名 URL)")
|
||||
private String audioUrl;
|
||||
|
||||
@Schema(description = "音频格式", example = "wav")
|
||||
private String format;
|
||||
|
||||
@Schema(description = "采样率", example = "24000")
|
||||
private Integer sampleRate;
|
||||
|
||||
@Schema(description = "CosyVoice 请求ID")
|
||||
private String requestId;
|
||||
|
||||
@Schema(description = "使用的音色 ID")
|
||||
private String voiceId;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* 文本转语音请求 VO
|
||||
*/
|
||||
@Data
|
||||
public class AppTikVoiceTtsReqVO {
|
||||
|
||||
@Schema(description = "输入文本")
|
||||
@Size(max = 4000, message = "输入文本不能超过 4000 个字符")
|
||||
private String inputText;
|
||||
|
||||
@Schema(description = "识别文本,用于拼接")
|
||||
@Size(max = 4000, message = "识别文本不能超过 4000 个字符")
|
||||
private String transcriptionText;
|
||||
|
||||
@Schema(description = "音色 ID(CosyVoice voiceId)", example = "cosyvoice-v2-myvoice-xxx")
|
||||
private String voiceId;
|
||||
|
||||
@Schema(description = "音色源音频 OSS 地址(当没有 voiceId 时必传)")
|
||||
private String fileUrl;
|
||||
|
||||
@Schema(description = "模型名称,默认 cosyvoice-v2", example = "cosyvoice-v3")
|
||||
private String model;
|
||||
|
||||
@Schema(description = "语速,默认 1.0", example = "1.0")
|
||||
private Float speechRate;
|
||||
|
||||
@Schema(description = "情感", example = "happy")
|
||||
private String emotion;
|
||||
|
||||
@Schema(description = "音量调节范围 [-10,10]", example = "0")
|
||||
private Float volume;
|
||||
|
||||
@Schema(description = "目标采样率,默认 24000")
|
||||
private Integer sampleRate;
|
||||
|
||||
@Schema(description = "音频格式,默认 wav,可选 mp3")
|
||||
private String audioFormat;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
package cn.iocoder.yudao.module.tik.voice.vo;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Schema(description = "CosyVoice 文本转语音响应")
|
||||
public class AppTikVoiceTtsRespVO {
|
||||
|
||||
@Schema(description = "用户文件编号", example = "1024")
|
||||
private Long fileId;
|
||||
|
||||
@Schema(description = "音频播放地址(预签名 URL)")
|
||||
private String audioUrl;
|
||||
|
||||
@Schema(description = "音频格式", example = "mp3")
|
||||
private String format;
|
||||
|
||||
@Schema(description = "采样率", example = "24000")
|
||||
private Integer sampleRate;
|
||||
|
||||
@Schema(description = "CosyVoice 请求ID")
|
||||
private String requestId;
|
||||
|
||||
@Schema(description = "使用的音色 ID")
|
||||
private String voiceId;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user