PYTHON实现麦克风实时传流语音听写
语音听写顾名思义,是把我们说的话转成文字,但是讯飞官网提供的是音频文件转文字,我们如何通过PYTHON调用麦克风把我们实时对着电脑说的话转成文字呢,那就需要麦克风传音频流的方式,这样我们可以实现指令识别、发音转文字等功能和结合实际业务使用。
# This is a sample Python script.
# 变量
import base64
import datetime
import hashlib
import hmac
import json
import ssl
import threading
import time
from builtins import str
from datetime import datetime
from time import mktime
from urllib.parse import urlencode
from wsgiref.handlers import format_date_time
import pyaudio
import websocket
host_url = "wss://ws-api.xfyun.cn/v2/iat"
appid = "" # 控制台获取
api_secret = ""
api_key = ""
audio_file = "./1.pcm"
send_flag = True
def product_url(api_secret, api_key):
now_time = datetime.now()
now_date = format_date_time(mktime(now_time.timetuple()))
# print(now_date)
# 拼接鉴权原始餐宿
# now_date = "Fri, 18 Oct 2024 07:39:19 GMT"
origin_base = "host: " + "ws-api.xfyun.cn" + "\n"
origin_base += "date: " + now_date + "\n"
origin_base += "GET " + "/v2/iat " + "HTTP/1.1"
# print(origin_base)
# sha256加密
signature_sha = hmac.new(api_secret.encode('utf-8'), origin_base.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
print(signature_sha)
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
api_key, "hmac-sha256", "host date request-line", signature_sha)
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
print(authorization)
# 将请求的鉴权参数组合为字典
dict_data = {
"authorization": authorization,
"date": now_date,
"host": "ws-api.xfyun.cn"
}
ws_url = host_url + '?' + urlencode(dict_data)
# print(ws_url)
return ws_url
def on_message(ws, message):
print(f"Received message: {message}")
status = json.loads(message)["data"]["status"]
ws_list = json.loads(message)["data"]["result"]["ws"]
mark = json.loads(message)["data"]["result"]["pgs"]
res = ""
for my_ws in ws_list:
for w in my_ws["cw"]:
res = res + w["w"]
print(f"{res} ---标志:{mark}")
# print(status)
if status == 2:
global send_flag
send_flag = False
ws.close()
def on_error(ws, error):
print(f"Error: {error},{ws}")
def on_close(ws, reason, res):
print(f"WebSocket connection closed,{ws}")
def on_open(ws):
print(f"WebSocket connection opened,{ws},ws连接建立成功...")
# 这里可以发送初始消息给服务器,如果需要的话
first_dict = {
"common": {
"app_id": appid
},
"business": {
"language": "zh_cn",
"domain": "iat",
"accent": "mandarin",
"dwa": "wpgs"
},
"data": {
"status": 0,
"format": "audio/L16;rate=16000",
"encoding": "raw",
"audio": ""
}
}
ws.send(json.dumps(first_dict)) # 发送第一帧
def close_connection(ws):
print("Closing WebSocket connection...")
ws.close()
# 主函数入口
if __name__ == '__main__':
start_time = datetime.now()
websocket.enableTrace(False)
ws_url = product_url(api_secret, api_key)
ws_entity = websocket.WebSocketApp(ws_url, on_message=on_message, on_error=on_error, on_close=on_close,
on_open=on_open)
ws_entity.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
end_time = datetime.now()
print(f"听写耗时: {end_time - start_time}")