语音转文本提取为通用方法

This commit is contained in:
zhonghua1
2026-01-04 22:06:14 +08:00
parent 31561465a8
commit 1167707631

View File

@@ -4,6 +4,7 @@ import com.alibaba.dashscope.audio.asr.transcription.*;
import com.rj.entity.TtsRequestLog;
import com.rj.mapper.TtsRequestLogMapper;
import com.rj.service.ITtsRequestLogService;
import com.rj.service.MinIOService;
import com.rj.utils.MinIOUrlGenerator;
import com.rj.dto.AsrRequest;
import com.rj.dto.AsrResponse;
@@ -12,9 +13,13 @@ import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.mock.web.MockMultipartFile;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.JsonNode;
import java.io.File;
import java.nio.file.Files;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Arrays;
@@ -36,6 +41,9 @@ public class TtsRequestLogServiceImpl implements ITtsRequestLogService {
@Autowired
private MinIOUrlGenerator urlGenerator;
@Autowired
private MinIOService minIOService;
@Value("${dashscope.api.key}")
private String apiKey;
@@ -111,6 +119,175 @@ public class TtsRequestLogServiceImpl implements ITtsRequestLogService {
return false;
}
/**
* 音频转文本的公共方法
* 支持本地文件路径或URL
*
* @param audioPathOrUrl 音频文件路径本地路径或URL
* @return 转换后的文本内容失败时返回null
*/
public String transcribeAudioToText(String audioPathOrUrl) {
return transcribeAudioToText(audioPathOrUrl, defaultAsrModel);
}
/**
* 音频转文本的公共方法
* 支持本地文件路径或URL
*
* @param audioPathOrUrl 音频文件路径本地路径或URL
* @param model ASR模型名称如果为null则使用默认模型
* @return 转换后的文本内容失败时返回null
*/
public String transcribeAudioToText(String audioPathOrUrl, String model) {
if (audioPathOrUrl == null || audioPathOrUrl.trim().isEmpty()) {
log.error("音频文件路径或URL不能为空");
return null;
}
if (!isAsrServiceAvailable()) {
log.error("ASR服务不可用请检查配置");
return null;
}
try {
// 判断是URL还是本地文件路径
String audioUrl = audioPathOrUrl;
// 判断是否为URL以http://或https://开头)
if (!audioPathOrUrl.startsWith("http://") && !audioPathOrUrl.startsWith("https://")) {
log.info("检测到本地文件路径: {}", audioPathOrUrl);
// 检查文件是否存在
File file = new File(audioPathOrUrl);
if (!file.exists() || !file.isFile()) {
log.error("本地文件不存在或不是文件: {}", audioPathOrUrl);
return null;
}
// 上传本地文件到MinIO获取URL
try {
String fileName = file.getName();
String contentType = getContentType(fileName);
// 读取文件内容
byte[] fileBytes = Files.readAllBytes(file.toPath());
// 创建MultipartFile对象
MultipartFile multipartFile = new MockMultipartFile(
"file",
fileName,
contentType,
fileBytes
);
// 上传到MinIO
audioUrl = minIOService.uploadFile(multipartFile);
log.info("本地文件已上传到MinIOURL: {}", audioUrl);
} catch (Exception e) {
log.error("上传本地文件到MinIO失败: {}", e.getMessage(), e);
return null;
}
} else {
log.info("使用音频URL: {}", audioUrl);
}
// 使用默认模型或指定模型
String asrModel = (model != null && !model.trim().isEmpty()) ? model : defaultAsrModel;
log.info("开始语音识别 - 音频URL: {}, 模型: {}", audioUrl, asrModel);
// 创建转写请求参数
TranscriptionParam param = TranscriptionParam.builder()
.apiKey(apiKey)
.model(asrModel)
.fileUrls(Arrays.asList(audioUrl))
.build();
// 创建转写对象
Transcription transcription = new Transcription();
// 提交转写请求
TranscriptionResult result = transcription.asyncCall(param);
String taskId = result.getTaskId();
log.info("ASR任务已提交 - TaskId: {}", taskId);
// 等待任务完成
TranscriptionQueryParam queryParam = TranscriptionQueryParam.FromTranscriptionParam(param, taskId);
result = transcription.wait(queryParam);
log.info("语音识别完成 - TaskId: {}", taskId);
if (result.getResults() != null && !result.getResults().isEmpty()) {
// 解析识别结果
StringBuilder fullText = new StringBuilder();
// 遍历所有结果获取transcriptionUrl并下载识别文本
for (TranscriptionTaskResult taskResult : result.getResults()) {
if (taskResult.getTranscriptionUrl() != null) {
try {
// 从transcriptionUrl下载识别结果
String transcriptionResult = downloadTranscriptionResult(taskResult.getTranscriptionUrl());
if (transcriptionResult != null && !transcriptionResult.trim().isEmpty()) {
// 解析transcription结果JSON
String extractedText = parseTranscriptionResult(transcriptionResult);
if (extractedText != null && !extractedText.trim().isEmpty()) {
if (fullText.length() > 0) {
fullText.append(" ");
}
fullText.append(extractedText);
}
}
} catch (Exception e) {
log.warn("下载transcription结果失败: {}", e.getMessage());
}
}
}
String resultText = fullText.toString().trim();
if (resultText.isEmpty()) {
log.warn("ASR识别结果为空 - TaskId: {}", taskId);
return null;
}
log.info("语音识别完成 - 识别文本长度: {}, TaskId: {}", resultText.length(), taskId);
return resultText;
} else {
log.warn("ASR识别结果为空 - TaskId: {}", taskId);
return null;
}
} catch (Exception e) {
log.error("语音识别失败: {}", e.getMessage(), e);
return null;
}
}
/**
* 根据文件名获取Content-Type
*/
private String getContentType(String fileName) {
if (fileName == null) {
return "application/octet-stream";
}
String lowerName = fileName.toLowerCase();
if (lowerName.endsWith(".mp3")) {
return "audio/mpeg";
} else if (lowerName.endsWith(".wav")) {
return "audio/wav";
} else if (lowerName.endsWith(".m4a")) {
return "audio/mp4";
} else if (lowerName.endsWith(".aac")) {
return "audio/aac";
} else if (lowerName.endsWith(".ogg")) {
return "audio/ogg";
} else if (lowerName.endsWith(".flac")) {
return "audio/flac";
} else if (lowerName.endsWith(".wma")) {
return "audio/x-ms-wma";
}
return "application/octet-stream";
}
@Override
public AsrResponse speechToText(AsrRequest request) {
long startTime = System.currentTimeMillis();