add xunfei voicemaster
@@ -46,4 +46,8 @@ def create_voice(voice_type): | |||
from voice.edge.edge_voice import EdgeVoice | |||
return EdgeVoice() | |||
elif voice_type == "xunfei": | |||
from voice.xunfei.xunfei_voice import XunfeiVoice | |||
return XunfeiVoice() | |||
raise RuntimeError |
@@ -0,0 +1,7 @@ | |||
{ | |||
"APPID":"xxx71xxx", #讯飞xfyun.cn控制台中应用的ID | |||
"APIKey":"xxxx69058exxxxxx", #讯飞xfyun.cn控制台语音合成或者听写界面的APIKey | |||
"APISecret":"xxxx697f0xxxxxx", #讯飞xfyun.cn控制台语音合成或者听写界面的APIKey | |||
"BusinessArgsTTS":{"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"}, #语音合成的参数,具体可以参考xfyun.cn的文档 | |||
"BusinessArgsASR":{"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vad_eos":10000, "dwa": "wpgs"} #语音听写的参数,具体可以参考xfyun.cn的文档 | |||
} |
@@ -0,0 +1,209 @@ | |||
# -*- coding:utf-8 -*- | |||
# | |||
# Author: njnuko | |||
# Email: njnuko@163.com | |||
# | |||
# 这个文档是基于官方的demo来改的,固体官方demo文档请参考官网 | |||
# | |||
# 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html | |||
# webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra= | |||
# 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--个性化热词, | |||
# 设置热词 | |||
# 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。 | |||
# 语音听写流式WebAPI 服务,方言试用方法:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--识别语种列表 | |||
# 可添加语种或方言,添加后会显示该方言的参数值 | |||
# 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看) | |||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |||
import websocket | |||
import datetime | |||
import hashlib | |||
import base64 | |||
import hmac | |||
import json | |||
from urllib.parse import urlencode | |||
import time | |||
import ssl | |||
from wsgiref.handlers import format_date_time | |||
from datetime import datetime | |||
from time import mktime | |||
import _thread as thread | |||
import os | |||
import wave | |||
STATUS_FIRST_FRAME = 0 # 第一帧的标识 | |||
STATUS_CONTINUE_FRAME = 1 # 中间帧标识 | |||
STATUS_LAST_FRAME = 2 # 最后一帧的标识 | |||
############# | |||
#whole_dict 是用来存储返回值的,由于带语音修正,所以用dict来存储,有更新的化pop之前的值,最后再合并 | |||
global whole_dict | |||
#这个文档是官方文档改的,这个参数是用来做函数调用时用的 | |||
global wsParam | |||
############## | |||
class Ws_Param(object): | |||
# 初始化 | |||
def __init__(self, APPID, APIKey, APISecret,BusinessArgs, AudioFile): | |||
self.APPID = APPID | |||
self.APIKey = APIKey | |||
self.APISecret = APISecret | |||
self.AudioFile = AudioFile | |||
self.BusinessArgs = BusinessArgs | |||
# 公共参数(common) | |||
self.CommonArgs = {"app_id": self.APPID} | |||
# 业务参数(business),更多个性化参数可在官网查看 | |||
#self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo":1,"vad_eos":10000} | |||
# 生成url | |||
def create_url(self): | |||
url = 'wss://ws-api.xfyun.cn/v2/iat' | |||
# 生成RFC1123格式的时间戳 | |||
now = datetime.now() | |||
date = format_date_time(mktime(now.timetuple())) | |||
# 拼接字符串 | |||
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n" | |||
signature_origin += "date: " + date + "\n" | |||
signature_origin += "GET " + "/v2/iat " + "HTTP/1.1" | |||
# 进行hmac-sha256进行加密 | |||
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), | |||
digestmod=hashlib.sha256).digest() | |||
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8') | |||
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % ( | |||
self.APIKey, "hmac-sha256", "host date request-line", signature_sha) | |||
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') | |||
# 将请求的鉴权参数组合为字典 | |||
v = { | |||
"authorization": authorization, | |||
"date": date, | |||
"host": "ws-api.xfyun.cn" | |||
} | |||
# 拼接鉴权参数,生成url | |||
url = url + '?' + urlencode(v) | |||
#print("date: ",date) | |||
#print("v: ",v) | |||
# 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致 | |||
#print('websocket url :', url) | |||
return url | |||
# 收到websocket消息的处理 | |||
def on_message(ws, message): | |||
global whole_dict | |||
try: | |||
code = json.loads(message)["code"] | |||
sid = json.loads(message)["sid"] | |||
if code != 0: | |||
errMsg = json.loads(message)["message"] | |||
print("sid:%s call error:%s code is:%s" % (sid, errMsg, code)) | |||
else: | |||
temp1 = json.loads(message)["data"]["result"] | |||
data = json.loads(message)["data"]["result"]["ws"] | |||
sn = temp1["sn"] | |||
if "rg" in temp1.keys(): | |||
rep = temp1["rg"] | |||
rep_start = rep[0] | |||
rep_end = rep[1] | |||
for sn in range(rep_start,rep_end+1): | |||
#print("before pop",whole_dict) | |||
#print("sn",sn) | |||
whole_dict.pop(sn,None) | |||
#print("after pop",whole_dict) | |||
results = "" | |||
for i in data: | |||
for w in i["cw"]: | |||
results += w["w"] | |||
whole_dict[sn]=results | |||
#print("after add",whole_dict) | |||
else: | |||
results = "" | |||
for i in data: | |||
for w in i["cw"]: | |||
results += w["w"] | |||
whole_dict[sn]=results | |||
#print("sid:%s call success!,data is:%s" % (sid, json.dumps(data, ensure_ascii=False))) | |||
except Exception as e: | |||
print("receive msg,but parse exception:", e) | |||
# 收到websocket错误的处理 | |||
def on_error(ws, error): | |||
print("### error:", error) | |||
# 收到websocket关闭的处理 | |||
def on_close(ws,a,b): | |||
print("### closed ###") | |||
# 收到websocket连接建立的处理 | |||
def on_open(ws): | |||
global wsParam | |||
def run(*args): | |||
frameSize = 8000 # 每一帧的音频大小 | |||
intervel = 0.04 # 发送音频间隔(单位:s) | |||
status = STATUS_FIRST_FRAME # 音频的状态信息,标识音频是第一帧,还是中间帧、最后一帧 | |||
with wave.open(wsParam.AudioFile, "rb") as fp: | |||
while True: | |||
buf = fp.readframes(frameSize) | |||
# 文件结束 | |||
if not buf: | |||
status = STATUS_LAST_FRAME | |||
# 第一帧处理 | |||
# 发送第一帧音频,带business 参数 | |||
# appid 必须带上,只需第一帧发送 | |||
if status == STATUS_FIRST_FRAME: | |||
d = {"common": wsParam.CommonArgs, | |||
"business": wsParam.BusinessArgs, | |||
"data": {"status": 0, "format": "audio/L16;rate=16000","audio": str(base64.b64encode(buf), 'utf-8'), "encoding": "raw"}} | |||
d = json.dumps(d) | |||
ws.send(d) | |||
status = STATUS_CONTINUE_FRAME | |||
# 中间帧处理 | |||
elif status == STATUS_CONTINUE_FRAME: | |||
d = {"data": {"status": 1, "format": "audio/L16;rate=16000", | |||
"audio": str(base64.b64encode(buf), 'utf-8'), | |||
"encoding": "raw"}} | |||
ws.send(json.dumps(d)) | |||
# 最后一帧处理 | |||
elif status == STATUS_LAST_FRAME: | |||
d = {"data": {"status": 2, "format": "audio/L16;rate=16000", | |||
"audio": str(base64.b64encode(buf), 'utf-8'), | |||
"encoding": "raw"}} | |||
ws.send(json.dumps(d)) | |||
time.sleep(1) | |||
break | |||
# 模拟音频采样间隔 | |||
time.sleep(intervel) | |||
ws.close() | |||
thread.start_new_thread(run, ()) | |||
#提供给xunfei_voice调用的函数 | |||
def xunfei_asr(APPID,APISecret,APIKey,BusinessArgsASR,AudioFile): | |||
global whole_dict | |||
global wsParam | |||
whole_dict = {} | |||
wsParam1 = Ws_Param(APPID=APPID, APISecret=APISecret, | |||
APIKey=APIKey,BusinessArgs=BusinessArgsASR, | |||
AudioFile=AudioFile) | |||
#wsParam是global变量,给上面on_open函数调用使用的 | |||
wsParam = wsParam1 | |||
websocket.enableTrace(True) | |||
wsUrl = wsParam.create_url() | |||
ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close) | |||
ws.on_open = on_open | |||
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) | |||
#把字典的值合并起来做最后识别的输出 | |||
whole_words = "" | |||
for i in sorted(whole_dict.keys()): | |||
whole_words += whole_dict[i] | |||
return whole_words | |||
@@ -0,0 +1,163 @@ | |||
# -*- coding:utf-8 -*- | |||
# | |||
# Author: njnuko | |||
# Email: njnuko@163.com | |||
# | |||
# 这个文档是基于官方的demo来改的,固体官方demo文档请参考官网 | |||
# | |||
# 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html | |||
# webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra= | |||
# 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--个性化热词, | |||
# 设置热词 | |||
# 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。 | |||
# 语音听写流式WebAPI 服务,方言试用方法:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--识别语种列表 | |||
# 可添加语种或方言,添加后会显示该方言的参数值 | |||
# 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看) | |||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |||
import websocket | |||
import datetime | |||
import hashlib | |||
import base64 | |||
import hmac | |||
import json | |||
from urllib.parse import urlencode | |||
import time | |||
import ssl | |||
from wsgiref.handlers import format_date_time | |||
from datetime import datetime | |||
from time import mktime | |||
import _thread as thread | |||
import os | |||
STATUS_FIRST_FRAME = 0 # 第一帧的标识 | |||
STATUS_CONTINUE_FRAME = 1 # 中间帧标识 | |||
STATUS_LAST_FRAME = 2 # 最后一帧的标识 | |||
############# | |||
#这个参数是用来做输出文件路径的 | |||
global outfile | |||
#这个文档是官方文档改的,这个参数是用来做函数调用时用的 | |||
global wsParam | |||
############## | |||
class Ws_Param(object): | |||
# 初始化 | |||
def __init__(self, APPID, APIKey, APISecret,BusinessArgs,Text): | |||
self.APPID = APPID | |||
self.APIKey = APIKey | |||
self.APISecret = APISecret | |||
self.BusinessArgs = BusinessArgs | |||
self.Text = Text | |||
# 公共参数(common) | |||
self.CommonArgs = {"app_id": self.APPID} | |||
# 业务参数(business),更多个性化参数可在官网查看 | |||
#self.BusinessArgs = {"aue": "raw", "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"} | |||
self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")} | |||
#使用小语种须使用以下方式,此处的unicode指的是 utf16小端的编码方式,即"UTF-16LE"” | |||
#self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-16')), "UTF8")} | |||
# 生成url | |||
def create_url(self): | |||
url = 'wss://tts-api.xfyun.cn/v2/tts' | |||
# 生成RFC1123格式的时间戳 | |||
now = datetime.now() | |||
date = format_date_time(mktime(now.timetuple())) | |||
# 拼接字符串 | |||
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n" | |||
signature_origin += "date: " + date + "\n" | |||
signature_origin += "GET " + "/v2/tts " + "HTTP/1.1" | |||
# 进行hmac-sha256进行加密 | |||
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), | |||
digestmod=hashlib.sha256).digest() | |||
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8') | |||
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % ( | |||
self.APIKey, "hmac-sha256", "host date request-line", signature_sha) | |||
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') | |||
# 将请求的鉴权参数组合为字典 | |||
v = { | |||
"authorization": authorization, | |||
"date": date, | |||
"host": "ws-api.xfyun.cn" | |||
} | |||
# 拼接鉴权参数,生成url | |||
url = url + '?' + urlencode(v) | |||
# print("date: ",date) | |||
# print("v: ",v) | |||
# 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致 | |||
# print('websocket url :', url) | |||
return url | |||
def on_message(ws, message): | |||
#输出文件 | |||
global outfile | |||
try: | |||
message =json.loads(message) | |||
code = message["code"] | |||
sid = message["sid"] | |||
audio = message["data"]["audio"] | |||
audio = base64.b64decode(audio) | |||
status = message["data"]["status"] | |||
if status == 2: | |||
print("ws is closed") | |||
ws.close() | |||
if code != 0: | |||
errMsg = message["message"] | |||
print("sid:%s call error:%s code is:%s" % (sid, errMsg, code)) | |||
else: | |||
with open(outfile, 'ab') as f: | |||
f.write(audio) | |||
except Exception as e: | |||
print("receive msg,but parse exception:", e) | |||
# 收到websocket连接建立的处理 | |||
def on_open(ws): | |||
global outfile | |||
global wsParam | |||
def run(*args): | |||
d = {"common": wsParam.CommonArgs, | |||
"business": wsParam.BusinessArgs, | |||
"data": wsParam.Data, | |||
} | |||
d = json.dumps(d) | |||
print("------>开始发送文本数据") | |||
ws.send(d) | |||
if os.path.exists(outfile): | |||
os.remove(outfile) | |||
thread.start_new_thread(run, ()) | |||
# 收到websocket错误的处理 | |||
def on_error(ws, error): | |||
print("### error:", error) | |||
# 收到websocket关闭的处理 | |||
def on_close(ws): | |||
print("### closed ###") | |||
def xunfei_tts(APPID, APIKey, APISecret,BusinessArgsTTS, Text, OutFile): | |||
global outfile | |||
global wsParam | |||
outfile = OutFile | |||
wsParam1 = Ws_Param(APPID,APIKey,APISecret,BusinessArgsTTS,Text) | |||
wsParam = wsParam1 | |||
websocket.enableTrace(False) | |||
wsUrl = wsParam.create_url() | |||
ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close) | |||
ws.on_open = on_open | |||
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) | |||
return outfile | |||
@@ -0,0 +1,77 @@ | |||
##################################################################### | |||
# xunfei voice service | |||
# Auth: njnuko | |||
# Email: njnuko@163.com | |||
# | |||
# 要使用本模块, 首先到 xfyun.cn 注册一个开发者账号, | |||
# 之后创建一个新应用, 然后在应用管理的语音识别或者语音合同右边可以查看APPID API Key 和 Secret Key | |||
# 然后在 config.json 中填入这三个值 | |||
##################################################################### | |||
import json | |||
import os | |||
import time | |||
from bridge.reply import Reply, ReplyType | |||
from common.log import logger | |||
from common.tmp_dir import TmpDir | |||
from config import conf | |||
from voice.voice import Voice | |||
from .xunfei_asr import xunfei_asr | |||
from .xunfei_tts import xunfei_tts | |||
from voice.audio_convert import any_to_mp3 | |||
import shutil | |||
from pydub import AudioSegment | |||
class XunfeiVoice(Voice): | |||
def __init__(self): | |||
try: | |||
curdir = os.path.dirname(__file__) | |||
config_path = os.path.join(curdir, "config.json") | |||
conf = None | |||
with open(config_path, "r") as fr: | |||
conf = json.load(fr) | |||
print(conf) | |||
self.APPID = str(conf.get("APPID")) | |||
self.APIKey = str(conf.get("APIKey")) | |||
self.APISecret = str(conf.get("APISecret")) | |||
self.BusinessArgsTTS = conf.get("BusinessArgsTTS") | |||
self.BusinessArgsASR= conf.get("BusinessArgsASR") | |||
except Exception as e: | |||
logger.warn("XunfeiVoice init failed: %s, ignore " % e) | |||
def voiceToText(self, voice_file): | |||
# 识别本地文件 | |||
try: | |||
logger.debug("[Xunfei] voice file name={}".format(voice_file)) | |||
#print("voice_file===========",voice_file) | |||
#print("voice_file_type===========",type(voice_file)) | |||
#mp3_name, file_extension = os.path.splitext(voice_file) | |||
#mp3_file = mp3_name + ".mp3" | |||
#pcm_data=get_pcm_from_wav(voice_file) | |||
#mp3_name, file_extension = os.path.splitext(voice_file) | |||
#AudioSegment.from_wav(voice_file).export(mp3_file, format="mp3") | |||
#shutil.copy2(voice_file, 'tmp/test1.wav') | |||
#shutil.copy2(mp3_file, 'tmp/test1.mp3') | |||
#print("voice and mp3 file",voice_file,mp3_file) | |||
text = xunfei_asr(self.APPID,self.APISecret,self.APIKey,self.BusinessArgsASR,voice_file) | |||
logger.info("讯飞语音识别到了: {}".format(text)) | |||
reply = Reply(ReplyType.TEXT, text) | |||
except Exception as e: | |||
logger.warn("XunfeiVoice init failed: %s, ignore " % e) | |||
reply = Reply(ReplyType.ERROR, "讯飞语音识别出错了;{0}") | |||
return reply | |||
def textToVoice(self, text): | |||
try: | |||
# Avoid the same filename under multithreading | |||
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" | |||
return_file = xunfei_tts(self.APPID,self.APIKey,self.APISecret,self.BusinessArgsTTS,text,fileName) | |||
logger.info("[Xunfei] textToVoice text={} voice file name={}".format(text, fileName)) | |||
reply = Reply(ReplyType.VOICE, fileName) | |||
except Exception as e: | |||
logger.error("[Xunfei] textToVoice error={}".format(fileName)) | |||
reply = Reply(ReplyType.ERROR, "抱歉,讯飞语音合成失败") | |||
return reply |