语音转文本提取为通用方法
This commit is contained in:
@@ -4,6 +4,7 @@ import com.alibaba.dashscope.audio.asr.transcription.*;
|
||||
import com.rj.entity.TtsRequestLog;
|
||||
import com.rj.mapper.TtsRequestLogMapper;
|
||||
import com.rj.service.ITtsRequestLogService;
|
||||
import com.rj.service.MinIOService;
|
||||
import com.rj.utils.MinIOUrlGenerator;
|
||||
import com.rj.dto.AsrRequest;
|
||||
import com.rj.dto.AsrResponse;
|
||||
@@ -12,9 +13,13 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Files;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@@ -36,6 +41,9 @@ public class TtsRequestLogServiceImpl implements ITtsRequestLogService {
|
||||
@Autowired
|
||||
private MinIOUrlGenerator urlGenerator;
|
||||
|
||||
@Autowired
|
||||
private MinIOService minIOService;
|
||||
|
||||
@Value("${dashscope.api.key}")
|
||||
private String apiKey;
|
||||
|
||||
@@ -111,6 +119,175 @@ public class TtsRequestLogServiceImpl implements ITtsRequestLogService {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* 音频转文本的公共方法
|
||||
* 支持本地文件路径或URL
|
||||
*
|
||||
* @param audioPathOrUrl 音频文件路径(本地路径)或URL
|
||||
* @return 转换后的文本内容,失败时返回null
|
||||
*/
|
||||
public String transcribeAudioToText(String audioPathOrUrl) {
|
||||
return transcribeAudioToText(audioPathOrUrl, defaultAsrModel);
|
||||
}
|
||||
|
||||
/**
|
||||
* 音频转文本的公共方法
|
||||
* 支持本地文件路径或URL
|
||||
*
|
||||
* @param audioPathOrUrl 音频文件路径(本地路径)或URL
|
||||
* @param model ASR模型名称,如果为null则使用默认模型
|
||||
* @return 转换后的文本内容,失败时返回null
|
||||
*/
|
||||
public String transcribeAudioToText(String audioPathOrUrl, String model) {
|
||||
if (audioPathOrUrl == null || audioPathOrUrl.trim().isEmpty()) {
|
||||
log.error("音频文件路径或URL不能为空");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isAsrServiceAvailable()) {
|
||||
log.error("ASR服务不可用,请检查配置");
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
// 判断是URL还是本地文件路径
|
||||
String audioUrl = audioPathOrUrl;
|
||||
|
||||
// 判断是否为URL(以http://或https://开头)
|
||||
if (!audioPathOrUrl.startsWith("http://") && !audioPathOrUrl.startsWith("https://")) {
|
||||
log.info("检测到本地文件路径: {}", audioPathOrUrl);
|
||||
|
||||
// 检查文件是否存在
|
||||
File file = new File(audioPathOrUrl);
|
||||
if (!file.exists() || !file.isFile()) {
|
||||
log.error("本地文件不存在或不是文件: {}", audioPathOrUrl);
|
||||
return null;
|
||||
}
|
||||
|
||||
// 上传本地文件到MinIO获取URL
|
||||
try {
|
||||
String fileName = file.getName();
|
||||
String contentType = getContentType(fileName);
|
||||
|
||||
// 读取文件内容
|
||||
byte[] fileBytes = Files.readAllBytes(file.toPath());
|
||||
|
||||
// 创建MultipartFile对象
|
||||
MultipartFile multipartFile = new MockMultipartFile(
|
||||
"file",
|
||||
fileName,
|
||||
contentType,
|
||||
fileBytes
|
||||
);
|
||||
|
||||
// 上传到MinIO
|
||||
audioUrl = minIOService.uploadFile(multipartFile);
|
||||
log.info("本地文件已上传到MinIO,URL: {}", audioUrl);
|
||||
} catch (Exception e) {
|
||||
log.error("上传本地文件到MinIO失败: {}", e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
log.info("使用音频URL: {}", audioUrl);
|
||||
}
|
||||
|
||||
// 使用默认模型或指定模型
|
||||
String asrModel = (model != null && !model.trim().isEmpty()) ? model : defaultAsrModel;
|
||||
|
||||
log.info("开始语音识别 - 音频URL: {}, 模型: {}", audioUrl, asrModel);
|
||||
|
||||
// 创建转写请求参数
|
||||
TranscriptionParam param = TranscriptionParam.builder()
|
||||
.apiKey(apiKey)
|
||||
.model(asrModel)
|
||||
.fileUrls(Arrays.asList(audioUrl))
|
||||
.build();
|
||||
|
||||
// 创建转写对象
|
||||
Transcription transcription = new Transcription();
|
||||
|
||||
// 提交转写请求
|
||||
TranscriptionResult result = transcription.asyncCall(param);
|
||||
String taskId = result.getTaskId();
|
||||
log.info("ASR任务已提交 - TaskId: {}", taskId);
|
||||
|
||||
// 等待任务完成
|
||||
TranscriptionQueryParam queryParam = TranscriptionQueryParam.FromTranscriptionParam(param, taskId);
|
||||
result = transcription.wait(queryParam);
|
||||
log.info("语音识别完成 - TaskId: {}", taskId);
|
||||
|
||||
if (result.getResults() != null && !result.getResults().isEmpty()) {
|
||||
// 解析识别结果
|
||||
StringBuilder fullText = new StringBuilder();
|
||||
|
||||
// 遍历所有结果,获取transcriptionUrl并下载识别文本
|
||||
for (TranscriptionTaskResult taskResult : result.getResults()) {
|
||||
if (taskResult.getTranscriptionUrl() != null) {
|
||||
try {
|
||||
// 从transcriptionUrl下载识别结果
|
||||
String transcriptionResult = downloadTranscriptionResult(taskResult.getTranscriptionUrl());
|
||||
|
||||
if (transcriptionResult != null && !transcriptionResult.trim().isEmpty()) {
|
||||
// 解析transcription结果JSON
|
||||
String extractedText = parseTranscriptionResult(transcriptionResult);
|
||||
if (extractedText != null && !extractedText.trim().isEmpty()) {
|
||||
if (fullText.length() > 0) {
|
||||
fullText.append(" ");
|
||||
}
|
||||
fullText.append(extractedText);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("下载transcription结果失败: {}", e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String resultText = fullText.toString().trim();
|
||||
if (resultText.isEmpty()) {
|
||||
log.warn("ASR识别结果为空 - TaskId: {}", taskId);
|
||||
return null;
|
||||
}
|
||||
|
||||
log.info("语音识别完成 - 识别文本长度: {}, TaskId: {}", resultText.length(), taskId);
|
||||
return resultText;
|
||||
} else {
|
||||
log.warn("ASR识别结果为空 - TaskId: {}", taskId);
|
||||
return null;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("语音识别失败: {}", e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件名获取Content-Type
|
||||
*/
|
||||
private String getContentType(String fileName) {
|
||||
if (fileName == null) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
String lowerName = fileName.toLowerCase();
|
||||
if (lowerName.endsWith(".mp3")) {
|
||||
return "audio/mpeg";
|
||||
} else if (lowerName.endsWith(".wav")) {
|
||||
return "audio/wav";
|
||||
} else if (lowerName.endsWith(".m4a")) {
|
||||
return "audio/mp4";
|
||||
} else if (lowerName.endsWith(".aac")) {
|
||||
return "audio/aac";
|
||||
} else if (lowerName.endsWith(".ogg")) {
|
||||
return "audio/ogg";
|
||||
} else if (lowerName.endsWith(".flac")) {
|
||||
return "audio/flac";
|
||||
} else if (lowerName.endsWith(".wma")) {
|
||||
return "audio/x-ms-wma";
|
||||
}
|
||||
return "application/octet-stream";
|
||||
}
|
||||
|
||||
@Override
|
||||
public AsrResponse speechToText(AsrRequest request) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
Reference in New Issue
Block a user