@@ -46,4 +46,8 @@ def create_voice(voice_type): | |||||
from voice.edge.edge_voice import EdgeVoice | from voice.edge.edge_voice import EdgeVoice | ||||
return EdgeVoice() | return EdgeVoice() | ||||
elif voice_type == "xunfei": | |||||
from voice.xunfei.xunfei_voice import XunfeiVoice | |||||
return XunfeiVoice() | |||||
raise RuntimeError | raise RuntimeError |
@@ -0,0 +1,7 @@ | |||||
{ | |||||
"APPID":"xxx71xxx", #讯飞xfyun.cn控制台中应用的ID | |||||
"APIKey":"xxxx69058exxxxxx", #讯飞xfyun.cn控制台语音合成或者听写界面的APIKey | |||||
"APISecret":"xxxx697f0xxxxxx", #讯飞xfyun.cn控制台语音合成或者听写界面的APIKey | |||||
"BusinessArgsTTS":{"aue": "lame", "sfl": 1, "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"}, #语音合成的参数,具体可以参考xfyun.cn的文档 | |||||
"BusinessArgsASR":{"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vad_eos":10000, "dwa": "wpgs"} #语音听写的参数,具体可以参考xfyun.cn的文档 | |||||
} |
@@ -0,0 +1,209 @@ | |||||
# -*- coding:utf-8 -*- | |||||
# | |||||
# Author: njnuko | |||||
# Email: njnuko@163.com | |||||
# | |||||
# 这个文档是基于官方的demo来改的,固体官方demo文档请参考官网 | |||||
# | |||||
# 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html | |||||
# webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra= | |||||
# 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--个性化热词, | |||||
# 设置热词 | |||||
# 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。 | |||||
# 语音听写流式WebAPI 服务,方言试用方法:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--识别语种列表 | |||||
# 可添加语种或方言,添加后会显示该方言的参数值 | |||||
# 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看) | |||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |||||
import websocket | |||||
import datetime | |||||
import hashlib | |||||
import base64 | |||||
import hmac | |||||
import json | |||||
from urllib.parse import urlencode | |||||
import time | |||||
import ssl | |||||
from wsgiref.handlers import format_date_time | |||||
from datetime import datetime | |||||
from time import mktime | |||||
import _thread as thread | |||||
import os | |||||
import wave | |||||
STATUS_FIRST_FRAME = 0 # 第一帧的标识 | |||||
STATUS_CONTINUE_FRAME = 1 # 中间帧标识 | |||||
STATUS_LAST_FRAME = 2 # 最后一帧的标识 | |||||
############# | |||||
#whole_dict 是用来存储返回值的,由于带语音修正,所以用dict来存储,有更新的化pop之前的值,最后再合并 | |||||
global whole_dict | |||||
#这个文档是官方文档改的,这个参数是用来做函数调用时用的 | |||||
global wsParam | |||||
############## | |||||
class Ws_Param(object): | |||||
# 初始化 | |||||
def __init__(self, APPID, APIKey, APISecret,BusinessArgs, AudioFile): | |||||
self.APPID = APPID | |||||
self.APIKey = APIKey | |||||
self.APISecret = APISecret | |||||
self.AudioFile = AudioFile | |||||
self.BusinessArgs = BusinessArgs | |||||
# 公共参数(common) | |||||
self.CommonArgs = {"app_id": self.APPID} | |||||
# 业务参数(business),更多个性化参数可在官网查看 | |||||
#self.BusinessArgs = {"domain": "iat", "language": "zh_cn", "accent": "mandarin", "vinfo":1,"vad_eos":10000} | |||||
# 生成url | |||||
def create_url(self): | |||||
url = 'wss://ws-api.xfyun.cn/v2/iat' | |||||
# 生成RFC1123格式的时间戳 | |||||
now = datetime.now() | |||||
date = format_date_time(mktime(now.timetuple())) | |||||
# 拼接字符串 | |||||
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n" | |||||
signature_origin += "date: " + date + "\n" | |||||
signature_origin += "GET " + "/v2/iat " + "HTTP/1.1" | |||||
# 进行hmac-sha256进行加密 | |||||
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), | |||||
digestmod=hashlib.sha256).digest() | |||||
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8') | |||||
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % ( | |||||
self.APIKey, "hmac-sha256", "host date request-line", signature_sha) | |||||
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') | |||||
# 将请求的鉴权参数组合为字典 | |||||
v = { | |||||
"authorization": authorization, | |||||
"date": date, | |||||
"host": "ws-api.xfyun.cn" | |||||
} | |||||
# 拼接鉴权参数,生成url | |||||
url = url + '?' + urlencode(v) | |||||
#print("date: ",date) | |||||
#print("v: ",v) | |||||
# 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致 | |||||
#print('websocket url :', url) | |||||
return url | |||||
# 收到websocket消息的处理 | |||||
def on_message(ws, message): | |||||
global whole_dict | |||||
try: | |||||
code = json.loads(message)["code"] | |||||
sid = json.loads(message)["sid"] | |||||
if code != 0: | |||||
errMsg = json.loads(message)["message"] | |||||
print("sid:%s call error:%s code is:%s" % (sid, errMsg, code)) | |||||
else: | |||||
temp1 = json.loads(message)["data"]["result"] | |||||
data = json.loads(message)["data"]["result"]["ws"] | |||||
sn = temp1["sn"] | |||||
if "rg" in temp1.keys(): | |||||
rep = temp1["rg"] | |||||
rep_start = rep[0] | |||||
rep_end = rep[1] | |||||
for sn in range(rep_start,rep_end+1): | |||||
print("before pop",whole_dict) | |||||
print("sn",sn) | |||||
whole_dict.pop(sn,None) | |||||
print("after pop",whole_dict) | |||||
results = "" | |||||
for i in data: | |||||
for w in i["cw"]: | |||||
results += w["w"] | |||||
whole_dict[sn]=results | |||||
print("after add",whole_dict) | |||||
else: | |||||
results = "" | |||||
for i in data: | |||||
for w in i["cw"]: | |||||
results += w["w"] | |||||
whole_dict[sn]=results | |||||
print("sid:%s call success!,data is:%s" % (sid, json.dumps(data, ensure_ascii=False))) | |||||
except Exception as e: | |||||
print("receive msg,but parse exception:", e) | |||||
# 收到websocket错误的处理 | |||||
def on_error(ws, error): | |||||
print("### error:", error) | |||||
# 收到websocket关闭的处理 | |||||
def on_close(ws,a,b): | |||||
print("### closed ###") | |||||
# 收到websocket连接建立的处理 | |||||
def on_open(ws): | |||||
global wsParam | |||||
def run(*args): | |||||
frameSize = 8000 # 每一帧的音频大小 | |||||
intervel = 0.04 # 发送音频间隔(单位:s) | |||||
status = STATUS_FIRST_FRAME # 音频的状态信息,标识音频是第一帧,还是中间帧、最后一帧 | |||||
with wave.open(wsParam.AudioFile, "rb") as fp: | |||||
while True: | |||||
buf = fp.readframes(frameSize) | |||||
# 文件结束 | |||||
if not buf: | |||||
status = STATUS_LAST_FRAME | |||||
# 第一帧处理 | |||||
# 发送第一帧音频,带business 参数 | |||||
# appid 必须带上,只需第一帧发送 | |||||
if status == STATUS_FIRST_FRAME: | |||||
d = {"common": wsParam.CommonArgs, | |||||
"business": wsParam.BusinessArgs, | |||||
"data": {"status": 0, "format": "audio/L16;rate=16000","audio": str(base64.b64encode(buf), 'utf-8'), "encoding": "raw"}} | |||||
d = json.dumps(d) | |||||
ws.send(d) | |||||
status = STATUS_CONTINUE_FRAME | |||||
# 中间帧处理 | |||||
elif status == STATUS_CONTINUE_FRAME: | |||||
d = {"data": {"status": 1, "format": "audio/L16;rate=16000", | |||||
"audio": str(base64.b64encode(buf), 'utf-8'), | |||||
"encoding": "raw"}} | |||||
ws.send(json.dumps(d)) | |||||
# 最后一帧处理 | |||||
elif status == STATUS_LAST_FRAME: | |||||
d = {"data": {"status": 2, "format": "audio/L16;rate=16000", | |||||
"audio": str(base64.b64encode(buf), 'utf-8'), | |||||
"encoding": "raw"}} | |||||
ws.send(json.dumps(d)) | |||||
time.sleep(1) | |||||
break | |||||
# 模拟音频采样间隔 | |||||
time.sleep(intervel) | |||||
ws.close() | |||||
thread.start_new_thread(run, ()) | |||||
#提供给xunfei_voice调用的函数 | |||||
def xunfei_asr(APPID,APISecret,APIKey,BusinessArgsASR,AudioFile): | |||||
global whole_dict | |||||
global wsParam | |||||
whole_dict = {} | |||||
wsParam1 = Ws_Param(APPID=APPID, APISecret=APISecret, | |||||
APIKey=APIKey,BusinessArgs=BusinessArgsASR, | |||||
AudioFile=AudioFile) | |||||
#wsParam是global变量,给上面on_open函数调用使用的 | |||||
wsParam = wsParam1 | |||||
websocket.enableTrace(True) | |||||
wsUrl = wsParam.create_url() | |||||
ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close) | |||||
ws.on_open = on_open | |||||
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) | |||||
#把字典的值合并起来做最后识别的输出 | |||||
whole_words = "" | |||||
for i in sorted(whole_dict.keys()): | |||||
whole_words += whole_dict[i] | |||||
return whole_words | |||||
@@ -0,0 +1,163 @@ | |||||
# -*- coding:utf-8 -*- | |||||
# | |||||
# Author: njnuko | |||||
# Email: njnuko@163.com | |||||
# | |||||
# 这个文档是基于官方的demo来改的,固体官方demo文档请参考官网 | |||||
# | |||||
# 语音听写流式 WebAPI 接口调用示例 接口文档(必看):https://doc.xfyun.cn/rest_api/语音听写(流式版).html | |||||
# webapi 听写服务参考帖子(必看):http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38947&extra= | |||||
# 语音听写流式WebAPI 服务,热词使用方式:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--个性化热词, | |||||
# 设置热词 | |||||
# 注意:热词只能在识别的时候会增加热词的识别权重,需要注意的是增加相应词条的识别率,但并不是绝对的,具体效果以您测试为准。 | |||||
# 语音听写流式WebAPI 服务,方言试用方法:登陆开放平台https://www.xfyun.cn/后,找到控制台--我的应用---语音听写(流式)---服务管理--识别语种列表 | |||||
# 可添加语种或方言,添加后会显示该方言的参数值 | |||||
# 错误码链接:https://www.xfyun.cn/document/error-code (code返回错误码时必看) | |||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |||||
import websocket | |||||
import datetime | |||||
import hashlib | |||||
import base64 | |||||
import hmac | |||||
import json | |||||
from urllib.parse import urlencode | |||||
import time | |||||
import ssl | |||||
from wsgiref.handlers import format_date_time | |||||
from datetime import datetime | |||||
from time import mktime | |||||
import _thread as thread | |||||
import os | |||||
STATUS_FIRST_FRAME = 0 # 第一帧的标识 | |||||
STATUS_CONTINUE_FRAME = 1 # 中间帧标识 | |||||
STATUS_LAST_FRAME = 2 # 最后一帧的标识 | |||||
############# | |||||
#这个参数是用来做输出文件路径的 | |||||
global outfile | |||||
#这个文档是官方文档改的,这个参数是用来做函数调用时用的 | |||||
global wsParam | |||||
############## | |||||
class Ws_Param(object): | |||||
# 初始化 | |||||
def __init__(self, APPID, APIKey, APISecret,BusinessArgs,Text): | |||||
self.APPID = APPID | |||||
self.APIKey = APIKey | |||||
self.APISecret = APISecret | |||||
self.BusinessArgs = BusinessArgs | |||||
self.Text = Text | |||||
# 公共参数(common) | |||||
self.CommonArgs = {"app_id": self.APPID} | |||||
# 业务参数(business),更多个性化参数可在官网查看 | |||||
#self.BusinessArgs = {"aue": "raw", "auf": "audio/L16;rate=16000", "vcn": "xiaoyan", "tte": "utf8"} | |||||
self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")} | |||||
#使用小语种须使用以下方式,此处的unicode指的是 utf16小端的编码方式,即"UTF-16LE"” | |||||
#self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-16')), "UTF8")} | |||||
# 生成url | |||||
def create_url(self): | |||||
url = 'wss://tts-api.xfyun.cn/v2/tts' | |||||
# 生成RFC1123格式的时间戳 | |||||
now = datetime.now() | |||||
date = format_date_time(mktime(now.timetuple())) | |||||
# 拼接字符串 | |||||
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n" | |||||
signature_origin += "date: " + date + "\n" | |||||
signature_origin += "GET " + "/v2/tts " + "HTTP/1.1" | |||||
# 进行hmac-sha256进行加密 | |||||
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), | |||||
digestmod=hashlib.sha256).digest() | |||||
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8') | |||||
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % ( | |||||
self.APIKey, "hmac-sha256", "host date request-line", signature_sha) | |||||
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8') | |||||
# 将请求的鉴权参数组合为字典 | |||||
v = { | |||||
"authorization": authorization, | |||||
"date": date, | |||||
"host": "ws-api.xfyun.cn" | |||||
} | |||||
# 拼接鉴权参数,生成url | |||||
url = url + '?' + urlencode(v) | |||||
# print("date: ",date) | |||||
# print("v: ",v) | |||||
# 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致 | |||||
# print('websocket url :', url) | |||||
return url | |||||
def on_message(ws, message): | |||||
#输出文件 | |||||
global outfile | |||||
try: | |||||
message =json.loads(message) | |||||
code = message["code"] | |||||
sid = message["sid"] | |||||
audio = message["data"]["audio"] | |||||
audio = base64.b64decode(audio) | |||||
status = message["data"]["status"] | |||||
if status == 2: | |||||
print("ws is closed") | |||||
ws.close() | |||||
if code != 0: | |||||
errMsg = message["message"] | |||||
print("sid:%s call error:%s code is:%s" % (sid, errMsg, code)) | |||||
else: | |||||
with open(outfile, 'ab') as f: | |||||
f.write(audio) | |||||
except Exception as e: | |||||
print("receive msg,but parse exception:", e) | |||||
# 收到websocket连接建立的处理 | |||||
def on_open(ws): | |||||
global outfile | |||||
global wsParam | |||||
def run(*args): | |||||
d = {"common": wsParam.CommonArgs, | |||||
"business": wsParam.BusinessArgs, | |||||
"data": wsParam.Data, | |||||
} | |||||
d = json.dumps(d) | |||||
print("------>开始发送文本数据") | |||||
ws.send(d) | |||||
if os.path.exists(outfile): | |||||
os.remove(outfile) | |||||
thread.start_new_thread(run, ()) | |||||
# 收到websocket错误的处理 | |||||
def on_error(ws, error): | |||||
print("### error:", error) | |||||
# 收到websocket关闭的处理 | |||||
def on_close(ws): | |||||
print("### closed ###") | |||||
def xunfei_tts(APPID, APIKey, APISecret,BusinessArgsTTS, Text, OutFile): | |||||
global outfile | |||||
global wsParam | |||||
outfile = OutFile | |||||
wsParam1 = Ws_Param(APPID,APIKey,APISecret,BusinessArgsTTS,Text) | |||||
wsParam = wsParam1 | |||||
websocket.enableTrace(False) | |||||
wsUrl = wsParam.create_url() | |||||
ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close) | |||||
ws.on_open = on_open | |||||
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}) | |||||
return outfile | |||||
@@ -0,0 +1,77 @@ | |||||
##################################################################### | |||||
# xunfei voice service | |||||
# Auth: njnuko | |||||
# Email: njnuko@163.com | |||||
# | |||||
# 要使用本模块, 首先到 xfyun.cn 注册一个开发者账号, | |||||
# 之后创建一个新应用, 然后在应用管理的语音识别或者语音合同右边可以查看APPID API Key 和 Secret Key | |||||
# 然后在 config.json 中填入这三个值 | |||||
##################################################################### | |||||
import json | |||||
import os | |||||
import time | |||||
from bridge.reply import Reply, ReplyType | |||||
from common.log import logger | |||||
from common.tmp_dir import TmpDir | |||||
from config import conf | |||||
from voice.voice import Voice | |||||
from .xunfei_asr import xunfei_asr | |||||
from .xunfei_tts import xunfei_tts | |||||
from voice.audio_convert import any_to_mp3 | |||||
import shutil | |||||
from pydub import AudioSegment | |||||
class XunfeiVoice(Voice): | |||||
def __init__(self): | |||||
try: | |||||
curdir = os.path.dirname(__file__) | |||||
config_path = os.path.join(curdir, "config.json") | |||||
conf = None | |||||
with open(config_path, "r") as fr: | |||||
conf = json.load(fr) | |||||
print(conf) | |||||
self.APPID = str(conf.get("APPID")) | |||||
self.APIKey = str(conf.get("APIKey")) | |||||
self.APISecret = str(conf.get("APISecret")) | |||||
self.BusinessArgsTTS = conf.get("BusinessArgsTTS") | |||||
self.BusinessArgsASR= conf.get("BusinessArgsASR") | |||||
except Exception as e: | |||||
logger.warn("XunfeiVoice init failed: %s, ignore " % e) | |||||
def voiceToText(self, voice_file): | |||||
# 识别本地文件 | |||||
try: | |||||
logger.debug("[Xunfei] voice file name={}".format(voice_file)) | |||||
#print("voice_file===========",voice_file) | |||||
#print("voice_file_type===========",type(voice_file)) | |||||
#mp3_name, file_extension = os.path.splitext(voice_file) | |||||
#mp3_file = mp3_name + ".mp3" | |||||
#pcm_data=get_pcm_from_wav(voice_file) | |||||
#mp3_name, file_extension = os.path.splitext(voice_file) | |||||
#AudioSegment.from_wav(voice_file).export(mp3_file, format="mp3") | |||||
#shutil.copy2(voice_file, 'tmp/test1.wav') | |||||
#shutil.copy2(mp3_file, 'tmp/test1.mp3') | |||||
#print("voice and mp3 file",voice_file,mp3_file) | |||||
text = xunfei_asr(self.APPID,self.APISecret,self.APIKey,self.BusinessArgsASR,voice_file) | |||||
logger.info("讯飞语音识别到了: {}".format(text)) | |||||
reply = Reply(ReplyType.TEXT, text) | |||||
except Exception as e: | |||||
logger.warn("XunfeiVoice init failed: %s, ignore " % e) | |||||
reply = Reply(ReplyType.ERROR, "讯飞语音识别出错了;{0}") | |||||
return reply | |||||
def textToVoice(self, text): | |||||
try: | |||||
# Avoid the same filename under multithreading | |||||
fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" | |||||
return_file = xunfei_tts(self.APPID,self.APIKey,self.APISecret,self.BusinessArgsTTS,text,fileName) | |||||
logger.info("[Xunfei] textToVoice text={} voice file name={}".format(text, fileName)) | |||||
reply = Reply(ReplyType.VOICE, fileName) | |||||
except Exception as e: | |||||
logger.error("[Xunfei] textToVoice error={}".format(fileName)) | |||||
reply = Reply(ReplyType.ERROR, "抱歉,讯飞语音合成失败") | |||||
return reply |