当前位置: 首页 > article >正文

音频采集(VUE3+JAVA)

vue部分代码

xx.vue

import Recorder from './Recorder.js';
export default {
  data() {
        return {
            mediaStream: null,
            recorder: null,
            isRecording: false,
            audioChunks: [],
            vadInterval: null // 新增:用于存储声音活动检测的间隔 ID
        };
    },
    async mounted() {
        this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
        this.startVAD();
    },
    beforeDestroy() {
        // 新增:组件销毁前清理声音活动检测的间隔
        if (this.vadInterval) {
            cancelAnimationFrame(this.vadInterval);
        }
    },
    created() {
        this.defaultLogin();
    },
    methods: {
        startVAD() {
            const audioContext = new (window.AudioContext || window.webkitAudioContext)();
            const source = audioContext.createMediaStreamSource(this.mediaStream);
            const analyser = audioContext.createAnalyser();
            source.connect(analyser);
            analyser.fftSize = 2048;
            const bufferLength = analyser.frequencyBinCount;
            const dataArray = new Uint8Array(bufferLength);
            const checkVoiceActivity = () => {
                analyser.getByteFrequencyData(dataArray);
                let sum = 0;
                for (let i = 0; i < bufferLength; i++) {
                    sum += dataArray[i];
                }
                const average = sum / bufferLength;
                if (average > 30 && !this.isRecording) {
                    this.startRecording();
                } else if (average < 10 && this.isRecording) {
                    setTimeout(() => {
                        analyser.getByteFrequencyData(dataArray);
                        let newSum = 0;
                        for (let i = 0; i < bufferLength; i++) {
                            newSum += dataArray[i];
                        }
                        const newAverage = newSum / bufferLength;
                        if (newAverage < 10) {
                            this.stopRecording();
                        }
                    }, 500);
                }
                this.vadInterval = requestAnimationFrame(checkVoiceActivity); // 存储间隔 ID
            };
            requestAnimationFrame(checkVoiceActivity);
        },
        startRecording() {
            this.recorder = new Recorder(this.mediaStream);
            this.recorder.record();
            this.isRecording = true;
            console.log('开始录制');
        },
        stopRecording() {
            if (this.recorder && this.isRecording) {
                this.recorder.stopAndExport((blob) => {
                    const formData = new FormData();
                    formData.append('audioFile', blob, 'recorded-audio.opus');
                });
                this.isRecording = false;
                console.log('停止录制');
            }
        }
    }
};

Recorder.js

class Recorder {
    constructor(stream) {
        const AudioContext = window.AudioContext || window.webkitAudioContext;

        try {
            this.audioContext = new AudioContext();
        } catch (error) {
            console.error('创建 AudioContext 失败:', error);
            throw new Error('无法创建音频上下文,录音功能无法使用');
        }

        this.stream = stream;
        this.mediaRecorder = new MediaRecorder(stream);
        this.audioChunks = [];

        this.mediaRecorder.addEventListener('dataavailable', (event) => {
            if (event.data.size > 0) {
                this.audioChunks.push(event.data);
            }
        });

        this.mediaRecorder.addEventListener('stop', () => {
            console.log('录音停止,开始导出音频');
        });
    }

    record() {
        try {
            this.mediaRecorder.start();
        } catch (error) {
            console.error('开始录音失败:', error);
            throw new Error('无法开始录音');
        }
    }

    stop() {
        try {
            this.mediaRecorder.stop();
        } catch (error) {
            console.error('停止录音失败:', error);
            throw new Error('无法停止录音');
        }
    }

    exportWAV(callback) {
        try {
            const blob = new Blob(this.audioChunks, { type: 'audio/wav' });
            console.log('生成的 Blob 的 MIME 类型:', blob.type);
            const reader = new FileReader();
            reader.readAsArrayBuffer(blob);
            reader.onloadend = () => {
                const arrayBuffer = reader.result;
            };
            callback(blob);
            this.audioChunks = [];
        } catch (error) {
            console.error('导出 WAV 格式失败:', error);
            throw new Error('无法导出 WAV 格式的音频');
        }
    }

    stopAndExport(callback) {
        this.mediaRecorder.addEventListener('stop', () => {
            this.exportWAV(callback);
        });
        this.stop();
    }
}

export default Recorder;

JAVA部分

VoiceInputServiceImpl.java

package com.medical.asr.service.impl;

import com.medical.asr.service.VoiceInputService;
import com.medical.common.props.FileProps;
import lombok.extern.slf4j.Slf4j;
import org.springblade.core.tool.utils.StringPool;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

@Service
@Transactional(rollbackFor = Exception.class)
@Slf4j
public class VoiceInputServiceImpl implements VoiceInputService {

    @Autowired
    private FileProps fileProps;

    /**
     * 接收音频文件,并保存在目录下
     * @param audioFile 音频文件
     * @return 文件路径
     */
    private String receiveAudio(MultipartFile audioFile) {
        if (audioFile == null || audioFile.isEmpty()) {
            log.info("未收到音频文件");
            return StringPool.EMPTY;
        }
        try {
            //文件存放的地址
            String uploadDir = fileProps.getUploadPath();
            System.out.println(uploadDir);
            File dir = new File(uploadDir);
            if (!dir.exists()) {
                dir.mkdirs();
            }
            String fileName = System.currentTimeMillis() + "-" + audioFile.getOriginalFilename();
            Path filePath = Paths.get(uploadDir, fileName);
            Files.write(filePath, audioFile.getBytes());
            log.info("Received audio file: " + fileName);
            log.info("音频接收成功");
            return filePath.toString();
        } catch (IOException e) {
            log.error("保存音频文件时出错: " + e.getMessage());
            return StringPool.EMPTY;
        }
    }

}

但是出了一个问题,就是这样生成的音频文件,通过ffmpeg查看发现是存在问题的,用来听没问题,但是要做加工,就不是合适的WAV文件。

人家需要满足这样的条件:

而我们这边出来的音频文件是这样的:

sample_rate(采样率), bits_per_sample(每个采样点所使用的位数 / 位深度), codec_name(编解码器名称), codec_long_name(编解码器完整名称 / 详细描述)这几项都不满足。于是查找opus转pcm的方案,修改之前的代码,新代码为:

private String receiveAudio(MultipartFile audioFile) {
        if (audioFile == null || audioFile.isEmpty()) {
            log.info("未收到音频文件");
            return StringPool.EMPTY;
        }
        try {
            String uploadDir = fileProps.getUploadPath();
            System.out.println(uploadDir);
            File dir = new File(uploadDir);
            if (!dir.exists()) {
                dir.mkdirs();
            }
            String fileName = System.currentTimeMillis() + "-" + audioFile.getOriginalFilename();
            Path filePath = Paths.get(uploadDir, fileName);
            Files.write(filePath, audioFile.getBytes());
            log.info("Received audio file: " + fileName);
            log.info("音频接收成功");
            // 转换音频格式和采样率
            int dotIndex = fileName.lastIndexOf('.');
            if (dotIndex!= -1) {
                fileName = fileName.substring(0, dotIndex);
            }
            String outputPath = Paths.get(uploadDir, "converted_" + fileName + ".wav").toString();
            //新增的部分
            convertAudio(filePath.toString(), outputPath);
            return outputPath;
        } catch (IOException e) {
            log.error("保存音频文件时出错: " + e.getMessage());
            return StringPool.EMPTY;
        }
    }

    public static void convertAudio(String inputPath, String outputPath) {
        String ffmpegCommand = "ffmpeg -i " + inputPath + " -ar 16000 -ac 1 -acodec pcm_s16le " + outputPath;
        try {
            Process process = Runtime.getRuntime().exec(ffmpegCommand);

            // 读取进程的输出和错误流,以便及时发现问题
            BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));
            String line;
            while ((line = reader.readLine())!= null) {
                System.out.println(line);
            }

            reader = new BufferedReader(new InputStreamReader(process.getErrorStream()));
            while ((line = reader.readLine())!= null) {
                System.out.println(line);
            }

            process.waitFor();
        } catch (IOException | InterruptedException e) {
            log.error("转换音频时出错: " + e.getMessage());
            e.printStackTrace();
        }
    }

这里面使用了ffmpeg的命令,如果当前环境没有ffmpeg,要记得先去下载安装ffmpeg,然后配置环境变量后再使用。


http://www.kler.cn/a/552551.html

相关文章:

  • 对称加密算法——IDEA加密算法
  • 【第12章:深度学习与伦理、隐私—12.2 数据隐私保护与差分隐私技术的实现与应用】
  • 前x-ai首席科学家karpathy的从零构建ChatGPT视频学习笔记--8000字长图文笔记预警(手打纯干货,通俗易懂)
  • Linux 磁盘挂载教程
  • 计算机毕业设计--基于深度学习技术(Yolov11、v8、v7、v5)算法的高效人脸检测模型设计与实现(含Github代码+Web端在线体验界面)
  • 超全Deepseek资料包,deepseek下载安装部署提示词及本地部署指南介绍
  • IO、NIO解读和不同点,以及常用的文件流操作方法
  • 在 Vue 3 中使用 Lottie 动画:实现一个加载动画
  • [数据结构]复杂度详解
  • 人工智能技术-基于长短期记忆(LSTM)网络在交通流量预测中的应用
  • 【kafka系列】broker
  • 【C语言】第三期——判断语句
  • 文件操作(PHP)(小迪网络安全笔记~
  • 【模板】图论 最短路 (Floyd+SPFA+Dijkstra)
  • JAVA的Servlet一些知识(学习自用)
  • 【kafka系列】如何选择消息语义?
  • oracle获取当月1号
  • 正式页面开发-登录注册页面
  • 从零开始构建一个小型字符级语言模型的详细教程(基于Transformer架构)之一数据准备
  • [实现Rpc] 通信类抽象层 | function | using | 解耦合设计思想