diff --git a/config.py b/config.py index 26f073d..c15b70f 100644 --- a/config.py +++ b/config.py @@ -95,7 +95,7 @@ available_setting = { "group_speech_recognition": False, # 是否开启群组语音识别 "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "always_reply_voice": False, # 是否一直使用语音回复 - "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure + "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,ali,azure "text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),ali,azure,elevenlabs,edge(online) "text_to_voice_model": "tts-1", "tts_voice_id": "alloy", diff --git a/voice/ali/ali_api.py b/voice/ali/ali_api.py index cac0c8c..def5c7a 100644 --- a/voice/ali/ali_api.py +++ b/voice/ali/ali_api.py @@ -8,6 +8,7 @@ Description: """ +import http.client import json import time import requests @@ -61,6 +62,69 @@ def text_to_speech_aliyun(url, text, appkey, token): return output_file +def speech_to_text_aliyun(url, audioContent, appkey, token): + """ + 使用阿里云的语音识别服务识别音频文件中的语音。 + + 参数: + - url (str): 阿里云语音识别服务的端点URL。 + - audioContent (byte): pcm音频数据。 + - appkey (str): 您的阿里云appkey。 + - token (str): 阿里云API的认证令牌。 + + 返回值: + - str: 成功时输出识别到的文本,否则为None。 + """ + format = 'pcm' + sample_rate = 16000 + enablePunctuationPrediction = True + enableInverseTextNormalization = True + enableVoiceDetection = False + + # 设置RESTful请求参数 + request = url + '?appkey=' + appkey + request = request + '&format=' + format + request = request + '&sample_rate=' + str(sample_rate) + + if enablePunctuationPrediction : + request = request + '&enable_punctuation_prediction=' + 'true' + + if enableInverseTextNormalization : + request = request + '&enable_inverse_text_normalization=' + 'true' + + if enableVoiceDetection : + request = request + '&enable_voice_detection=' + 'true' + + host = 'nls-gateway-cn-shanghai.aliyuncs.com' + + # 设置HTTPS请求头部 + httpHeaders = { + 'X-NLS-Token': token, + 'Content-type': 'application/octet-stream', + 'Content-Length': len(audioContent) + } + + conn = http.client.HTTPSConnection(host) + conn.request(method='POST', url=request, body=audioContent, headers=httpHeaders) + + response = conn.getresponse() + body = response.read() + try: + body = json.loads(body) + status = body['status'] + if status == 20000000 : + result = body['result'] + if result : + logger.info(f"阿里云语音识别到了:{result}") + conn.close() + return result + else : + logger.error(f"语音识别失败,状态码: {status}") + except ValueError: + logger.error(f"语音识别失败,收到非JSON格式的数据: {body}") + conn.close() + return None + class AliyunTokenGenerator: """ diff --git a/voice/ali/ali_voice.py b/voice/ali/ali_voice.py index 79a9aaa..43ea0b4 100644 --- a/voice/ali/ali_voice.py +++ b/voice/ali/ali_voice.py @@ -15,9 +15,9 @@ import time from bridge.reply import Reply, ReplyType from common.log import logger +from voice.audio_convert import get_pcm_from_wav from voice.voice import Voice -from voice.ali.ali_api import AliyunTokenGenerator -from voice.ali.ali_api import text_to_speech_aliyun +from voice.ali.ali_api import AliyunTokenGenerator, speech_to_text_aliyun, text_to_speech_aliyun from config import conf @@ -34,7 +34,8 @@ class AliVoice(Voice): self.token = None self.token_expire_time = 0 # 默认复用阿里云千问的 access_key 和 access_secret - self.api_url = config.get("api_url") + self.api_url_voice_to_text = config.get("api_url_voice_to_text") + self.api_url_text_to_voice = config.get("api_url_text_to_voice") self.app_key = config.get("app_key") self.access_key_id = conf().get("qwen_access_key_id") or config.get("access_key_id") self.access_key_secret = conf().get("qwen_access_key_secret") or config.get("access_key_secret") @@ -53,7 +54,7 @@ class AliVoice(Voice): r'äöüÄÖÜáéíóúÁÉÍÓÚàèìòùÀÈÌÒÙâêîôûÂÊÎÔÛçÇñÑ,。!?,.]', '', text) # 提取有效的token token_id = self.get_valid_token() - fileName = text_to_speech_aliyun(self.api_url, text, self.app_key, token_id) + fileName = text_to_speech_aliyun(self.api_url_text_to_voice, text, self.app_key, token_id) if fileName: logger.info("[Ali] textToVoice text={} voice file name={}".format(text, fileName)) reply = Reply(ReplyType.VOICE, fileName) @@ -61,6 +62,25 @@ class AliVoice(Voice): reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败") return reply + def voiceToText(self, voice_file): + """ + 将语音文件转换为文本。 + + :param voice_file: 要转换的语音文件。 + :return: 返回一个Reply对象,其中包含转换得到的文本或错误信息。 + """ + # 提取有效的token + token_id = self.get_valid_token() + logger.debug("[Ali] voice file name={}".format(voice_file)) + pcm = get_pcm_from_wav(voice_file) + text = speech_to_text_aliyun(self.api_url_voice_to_text, pcm, self.app_key, token_id) + if text: + logger.info("[Ali] VoicetoText = {}".format(text)) + reply = Reply(ReplyType.TEXT, text) + else: + reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败") + return reply + def get_valid_token(self): """ 获取有效的阿里云token。 diff --git a/voice/ali/config.json.template b/voice/ali/config.json.template index 6a4aaa9..563c57f 100644 --- a/voice/ali/config.json.template +++ b/voice/ali/config.json.template @@ -1,5 +1,6 @@ { - "api_url": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", + "api_url_text_to_voice": "https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/tts", + "api_url_voice_to_text": "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/asr", "app_key": "", "access_key_id": "", "access_key_secret": ""