@@ -95,7 +95,7 @@ available_setting = { | |||
"group_speech_recognition": False, # 是否开启群组语音识别 | |||
"voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key | |||
"always_reply_voice": False, # 是否一直使用语音回复 | |||
"voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure | |||
"voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,ali,azure | |||
"text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),ali,azure,elevenlabs,edge(online) | |||
"text_to_voice_model": "tts-1", | |||
"tts_voice_id": "alloy", | |||
@@ -8,6 +8,7 @@ Description: | |||
""" | |||
import http.client | |||
import json | |||
import time | |||
import requests | |||
@@ -61,6 +62,69 @@ def text_to_speech_aliyun(url, text, appkey, token): | |||
return output_file | |||
def speech_to_text_aliyun(url, audioContent, appkey, token): | |||
""" | |||
使用阿里云的语音识别服务识别音频文件中的语音。 | |||
参数: | |||
- url (str): 阿里云语音识别服务的端点URL。 | |||
- audioContent (byte): pcm音频数据。 | |||
- appkey (str): 您的阿里云appkey。 | |||
- token (str): 阿里云API的认证令牌。 | |||
返回值: | |||
- str: 成功时输出识别到的文本,否则为None。 | |||
""" | |||
format = 'pcm' | |||
sample_rate = 16000 | |||
enablePunctuationPrediction = True | |||
enableInverseTextNormalization = True | |||
enableVoiceDetection = False | |||
# 设置RESTful请求参数 | |||
request = url + '?appkey=' + appkey | |||
request = request + '&format=' + format | |||
request = request + '&sample_rate=' + str(sample_rate) | |||
if enablePunctuationPrediction : | |||
request = request + '&enable_punctuation_prediction=' + 'true' | |||
if enableInverseTextNormalization : | |||
request = request + '&enable_inverse_text_normalization=' + 'true' | |||
if enableVoiceDetection : | |||
request = request + '&enable_voice_detection=' + 'true' | |||
host = 'nls-gateway-cn-shanghai.aliyuncs.com' | |||
# 设置HTTPS请求头部 | |||
httpHeaders = { | |||
'X-NLS-Token': token, | |||
'Content-type': 'application/octet-stream', | |||
'Content-Length': len(audioContent) | |||
} | |||
conn = http.client.HTTPSConnection(host) | |||
conn.request(method='POST', url=request, body=audioContent, headers=httpHeaders) | |||
response = conn.getresponse() | |||
body = response.read() | |||
try: | |||
body = json.loads(body) | |||
status = body['status'] | |||
if status == 20000000 : | |||
result = body['result'] | |||
if result : | |||
logger.info(f"阿里云语音识别到了:{result}") | |||
conn.close() | |||
return result | |||
else : | |||
logger.error(f"语音识别失败,状态码: {status}") | |||
except ValueError: | |||
logger.error(f"语音识别失败,收到非JSON格式的数据: {body}") | |||
conn.close() | |||
return None | |||
class AliyunTokenGenerator: | |||
""" | |||
@@ -15,9 +15,9 @@ import time | |||
from bridge.reply import Reply, ReplyType | |||
from common.log import logger | |||
from voice.audio_convert import get_pcm_from_wav | |||
from voice.voice import Voice | |||
from voice.ali.ali_api import AliyunTokenGenerator | |||
from voice.ali.ali_api import text_to_speech_aliyun | |||
from voice.ali.ali_api import AliyunTokenGenerator, speech_to_text_aliyun, text_to_speech_aliyun | |||
from config import conf | |||
@@ -34,7 +34,8 @@ class AliVoice(Voice): | |||
self.token = None | |||
self.token_expire_time = 0 | |||
# 默认复用阿里云千问的 access_key 和 access_secret | |||
self.api_url = config.get("api_url") | |||
self.api_url_voice_to_text = config.get("api_url_voice_to_text") | |||
self.api_url_text_to_voice = config.get("api_url_text_to_voice") | |||
self.app_key = config.get("app_key") | |||
self.access_key_id = conf().get("qwen_access_key_id") or config.get("access_key_id") | |||
self.access_key_secret = conf().get("qwen_access_key_secret") or config.get("access_key_secret") | |||
@@ -53,7 +54,7 @@ class AliVoice(Voice): | |||
r'äöüÄÖÜáéíóúÁÉÍÓÚàèìòùÀÈÌÒÙâêîôûÂÊÎÔÛçÇñÑ,。!?,.]', '', text) | |||
# 提取有效的token | |||
token_id = self.get_valid_token() | |||
fileName = text_to_speech_aliyun(self.api_url, text, self.app_key, token_id) | |||
fileName = text_to_speech_aliyun(self.api_url_text_to_voice, text, self.app_key, token_id) | |||
if fileName: | |||
logger.info("[Ali] textToVoice text={} voice file name={}".format(text, fileName)) | |||
reply = Reply(ReplyType.VOICE, fileName) | |||
@@ -61,6 +62,25 @@ class AliVoice(Voice): | |||
reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败") | |||
return reply | |||
def voiceToText(self, voice_file): | |||
""" | |||
将语音文件转换为文本。 | |||
:param voice_file: 要转换的语音文件。 | |||
:return: 返回一个Reply对象,其中包含转换得到的文本或错误信息。 | |||
""" | |||
# 提取有效的token | |||
token_id = self.get_valid_token() | |||
logger.debug("[Ali] voice file name={}".format(voice_file)) | |||
pcm = get_pcm_from_wav(voice_file) | |||
text = speech_to_text_aliyun(self.api_url_voice_to_text, pcm, self.app_key, token_id) | |||
if text: | |||
logger.info("[Ali] VoicetoText = {}".format(text)) | |||
reply = Reply(ReplyType.TEXT, text) | |||
else: | |||
reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败") | |||
return reply | |||
def get_valid_token(self): | |||
""" | |||
获取有效的阿里云token。 | |||
@@ -1,5 +1,6 @@ | |||
{ | |||
"api_url": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", | |||
"api_url_text_to_voice": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", | |||
"api_url_voice_to_text": "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr", | |||
"app_key": "", | |||
"access_key_id": "", | |||
"access_key_secret": "" |