vue3 实现音频转文字组件
使用recorder-core
第三方插件实现音频转纯文本的功能。
工具类文件
recoder.ts
import Recorder from 'recorder-core'
import 'recorder-core/src/engine/wav'
import 'recorder-core/src/extensions/lib.fft.js'
import 'recorder-core/src/extensions/frequency.histogram.view'
interface RecorderConfig {
onProcess?: Promise<any> | Function
[keyname: string]: any
}
interface FrequencyHistogramViewConfig {
[keyname: string]: any
}
let recorderInstance: any = null
export const RecorderContructor = Recorder
export const createRecorder = (config?: RecorderConfig) => {
if (recorderInstance) {
return recorderInstance
}
recorderInstance = Recorder({
type: 'wav', // 录音格式,可以换成wav等其他格式
sampleRate: 16000, // 录音的采样率,越大细节越丰富越细腻
bitRate: 16, // 录音的比特率,越大音质越好
...(config || {})
// onProcess: (buffers, powerLevel, bufferDuration, bufferSampleRate, newBufferIdx, asyncEnd) => {
// // 录音实时回调,大约1秒调用12次本回调
// // 可实时绘制波形,实时上传(发送)数据
// if (this.wave) {
// this.wave.input(buffers[buffers.length - 1], powerLevel, bufferSampleRate)
// }
// }
})
return recorderInstance
}
export const destoryRecorder = () => {
if (recorderInstance) {
recorderInstance.close()
recorderInstance = null
Recorder.Destroy()
}
}
export const createRecorderWithWaveView = (el: HTMLElement, config?: FrequencyHistogramViewConfig) => {
return Recorder.FrequencyHistogramView({
elem: el,
lineCount: 30,
position: 0,
minHeight: 1,
fallDuration: 400,
stripeEnable: false,
mirrorEnable: true,
linear: [0, '#fff', 1, '#fff'],
...(config || {})
})
}
组件文案
AudioInput.vue
<template>
<div v-if="visibleModal" class="custom-Modal-container">
<Teleport to="body">
<div class="modal_box" ref="modalRef">
<div class="modal_mask" @click.stop="closeModal"></div>
<div class="modal_content">
<div class="audio_box">
<div class="audio_header">
<span class="audio_title_text">
<span v-if="audioStatus == 'input'">收音中</span>
<span v-else-if="audioStatus == 'transform' || audioStatus == 'end'">识别中</span>
<span v-else-if="audioStatus == 'unknown'">停止收音</span>...
</span>
<svg-icon class="close_icon" iconFileName="关闭" />
</div>
<div class="audio_content">
<div class="input_content_box">
<div class="input_content">
<span v-if="audioStatus == 'input'">请说,我在聆听…</span>
<span v-else-if="audioStatus == 'transform' || audioStatus == 'end'">
{{ audioContentText }}
</span>
<span class="unknow_tip_text" v-else-if="audioStatus == 'unknown'">未能识别,请点击图标重试</span>
</div>
<div v-if="audioStatus == 'input'" class="input_tip_text">您可以说出您需要搜索的内容关键词</div>
<div v-if="audioStatus == 'unknown'" class="input_tip_text">说出您需要搜索的内容关键词</div>
</div>
<div class="audio_icon_box" :class="audioStatus">
<i v-if="audioStatus == 'unknown'" class="img_box input_audio" @click="reStartRecorderHandle"></i>
<i v-if="audioStatus == 'end'" class="img_box input_audio" @click="confirmSearchHandle"></i>
<i v-if="audioStatus == 'input'" class="img_box input_audio" @click="finishRecorderHandle"></i>
<i v-if="audioStatus == 'transform'" class="img_box input_audio" @click="closeModal"></i>
</div>
<div ref="recorderWaveRef" class=""></div>
</div>
</div>
</div>
</div>
</Teleport>
</div>
</template>
<script setup lang="ts">
import { onMounted, onUnmounted, ref, watch } from 'vue'
import { v4 as uuidv4 } from 'uuid'
// 语音输入工具
import { createRecorder, createRecorderWithWaveView, destoryRecorder } from './recorder'
// api
import { getVoiceToText } from '@/services/common'
// type interface
type AudioInputStatus = 'ready' | 'input' | 'transform' | 'end' | 'unknown'
const visibleModal = defineModel<boolean>()
const emit = defineEmits(['close', 'complete'])
const audioStatus = ref<AudioInputStatus>('ready')
const modalRef = ref<any>(null)
const audioContentBlobData = ref<string>('')
const audioContentText = ref<string>('')
// recorder
const recorderIntance = ref<any>(null)
const recorderWaveInstance = ref<any>(null)
const recorderWaveRef = ref<any>(null)
const isLoadingRecorder = ref<boolean>(false)
/** *************** method ************** **/
const initRecorder = () => {
recorderIntance.value = createRecorder({
onProcess: (buffers: any[], powerLevel: any, bufferDuration, bufferSampleRate: any, newBufferIdx, asyncEnd) => {
// 录音实时回调,大约1秒调用12次本回调
// 可实时绘制波形,实时上传(发送)数据
if (recorderWaveInstance.value) {
recorderWaveInstance.value.input(buffers[buffers.length - 1], powerLevel, bufferSampleRate)
}
}
})
}
// 开始录音
const startRecorder = async () => {
audioStatus.value = 'input'
audioContentBlobData.value = ''
audioContentText.value = ''
isLoadingRecorder.value = true
await new Promise((resolve, reject) => {
recorderIntance.value.open(
async () => {
console.log('录音已打开')
resolve(true)
},
(msg: string, isUserNotAllow: boolean) => {
console.error('打开录音出错:' + msg, 'isUserNotAllow: ', isUserNotAllow)
reject(false)
}
)
})
try {
if (recorderWaveRef.value) {
// 创建音频可视化图形绘制对象
recorderWaveInstance.value = createRecorderWithWaveView(recorderWaveRef.value)
}
} catch (err) {
console.error('音频可视化图形绘制出错', err)
}
try {
console.log('尝试录音打开')
isLoadingRecorder.value = false
await recorderIntance.value.start()
console.log('录音已打开')
} catch {
console.error('打开录音出错')
audioStatus.value = 'unknown'
} finally {
isLoadingRecorder.value = false
}
}
// 结束录音
const stopRecorderHandle = async () => {
audioStatus.value = 'transform'
try {
console.log('尝试终止录音')
const { blob, duration } = await new Promise((resolve, reject) => {
recorderIntance.value.stop(
(blob: any, duration: any) => {
resolve({ blob, duration })
},
err => {
console.error('终止录音出错:' + err)
recorderIntance.value.close()
reject({ error: true, msg: err })
}
)
})
// 简单利用URL生成本地文件地址,此地址只能本地使用,比如赋值给audio.src进行播放,赋值给a.href然后a.click()进行下载(a需提供download="xxx.mp3"属性)
// this.localUrl = URL.createObjectURL(blob)
// console.log('录音成功blob', blob)
// console.log('localUrl', this.localUrl)
console.log('时长:' + duration + 'ms')
await recorderIntance.value.close()
audioContentBlobData.value = blob
} catch {
audioStatus.value = 'input'
}
}
// 重置输入
const reStartRecorderHandle = async () => {
if (isLoadingRecorder.value) return
isLoadingRecorder.value = false
await stopRecorderHandle().catch(err => err)
await startRecorder()
}
// 完成录音
const finishRecorderHandle = async () => {
if (isLoadingRecorder.value) return
isLoadingRecorder.value = true
try {
await stopRecorderHandle()
// 获取语音转文本并返回文案
await fetchVoiceToText()
audioStatus.value = 'end'
} catch {
audioStatus.value = 'unknown'
} finally {
isLoadingRecorder.value = false
}
}
// 把录音转成文本
const fetchVoiceToText = async () => {
const voice_data = audioContentBlobData.value
const formData = new FormData()
formData.append('voice_data', voice_data)
formData.append('seq', 0)
formData.append('end', 1)
formData.append('voice_id', uuidv4())
formData.append('voice_format', 12)
const { code, data } = await getVoiceToText(formData)
if (code === 200) {
console.log(data)
const { text } = data
audioContentText.value = text
}
}
const confirmSearchHandle = () => {
const text = audioContentText.value
emit('complete', text)
audioStatus.value = 'ready'
visibleModal.value = false
audioContentBlobData.value = ''
audioContentText.value = ''
}
const closeModal = async () => {
await stopRecorderHandle()
audioStatus.value = 'ready'
visibleModal.value = false
audioContentBlobData.value = ''
audioContentText.value = ''
emit('close')
}
/** ***************** watch ************** **/
watch(visibleModal, async val => {
if (val) {
await startRecorder()
}
})
/** ******************* life cycle ******************* **/
onMounted(() => {
initRecorder()
})
onUnmounted(() => {
recorderIntance.value = null
destoryRecorder()
})
/** ************** component expose *********** **/
defineExpose({
closeModal,
visibleModal
})
</script>
<style lang="scss" scoped>
.modal_box {
position: fixed;
width: 100%;
left: 0;
top: 0;
right: 0;
bottom: 0;
z-index: 2000;
}
.modal_mask {
position: absolute;
left: 0;
bottom: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.5);
}
.modal_content {
position: absolute;
bottom: 32px;
z-index: 1;
left: 0;
right: 0;
padding: 0 12px;
box-sizing: border-box;
* {
box-sizing: border-box;
}
.audio_box {
border-radius: 10px;
position: relative;
width: auto;
background-image: linear-gradient(91deg, #7d79ff 9%, #43e1ff 93%);
.audio_header {
height: 44px;
border-radius: 10px 10px 0 0;
color: #ffffff;
font-size: 14px;
font-weight: bold;
display: flex;
align-items: center;
justify-content: space-between;
padding-left: 22px;
padding-right: 18px;
}
.close_icon {
width: 12px;
height: 12px;
}
}
.audio_content {
height: 248px;
padding: 24px;
border-radius: 10px;
background-color: #ffffff;
}
.input_content_box {
height: 64px;
text-align: center;
overflow-y: auto;
}
.input_content {
text-align: center;
font-size: 16px;
color: $color-text;
font-weight: bold;
line-height: 22px;
}
.unknow_tip_text {
color: $color-danger;
}
.input_tip_text {
margin-top: 10px;
font-size: 12px;
line-height: 17px;
color: $color-text-light-3;
text-align: center;
}
.audio_icon_box {
display: flex;
align-items: center;
justify-content: center;
.img_box {
width: 138px;
height: 138px;
display: block;
&.input_audio {
background: url('@/assets/images/audio_input_icon.png') no-repeat center center;
}
}
}
}
</style>