From acbd506568d456acf612c88f7524a0bf0a6df434 Mon Sep 17 00:00:00 2001 From: uezhenxiang2023 Date: Sat, 19 Aug 2023 11:20:47 +0800 Subject: [PATCH 1/2] add ElevenLabs TTS to voice factory --- config.py | 5 ++- voice/elevent/elevent_voice.py | 79 ++++++++++++++++++++++++++++++++++ voice/factory.py | 4 ++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 voice/elevent/elevent_voice.py diff --git a/config.py b/config.py index fb5b407..a4bf4ef 100644 --- a/config.py +++ b/config.py @@ -61,7 +61,7 @@ available_setting = { "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "always_reply_voice": False, # 是否一直使用语音回复 "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure - "text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline),azure + "text_to_voice": "baidu", # 语音合成引擎,支持baidu,google,pytts(offline),azure,elevenlabs # baidu 语音api配置, 使用百度语音识别和语音合成时需要 "baidu_app_id": "", "baidu_api_key": "", @@ -71,6 +71,9 @@ available_setting = { # azure 语音api配置, 使用azure语音识别和语音合成时需要 "azure_voice_api_key": "", "azure_voice_region": "japaneast", + # elevenlabs 语音api配置 + "xi_api_key": "", #获取ap的方法可以参考https://docs.elevenlabs.io/api-reference/quick-start/authentication + "xi_voice_id": "", #ElevenLabs提供了9种英式、美式等英语发音id,分别是“Adam/Antoni/Arnold/Bella/Domi/Elli/Josh/Rachel/Sam” # 服务时间限制,目前支持itchat "chat_time_module": False, # 是否开启服务时间限制 "chat_start_time": "00:00", # 服务开始时间 diff --git a/voice/elevent/elevent_voice.py b/voice/elevent/elevent_voice.py new file mode 100644 index 0000000..40a2aec --- /dev/null +++ b/voice/elevent/elevent_voice.py @@ -0,0 +1,79 @@ +""" +eleventLabs voice service + +["voice_id":"pNInz6obpgDQGcFmaJgB","name":"Adam"] +["voice_id":"ErXwobaYiN019PkySvjV","name":"Antoni"] +["voice_id":"VR6AewLTigWG4xSOukaG","name":"Arnold"] +["voice_id":"EXAVITQu4vr4xnSDxMaL","name":"Bella"] +["voice_id":"AZnzlk1XvdvUeBnXmlld","name":"Domi"] +["voice_id":"MF3mGyEYCl7XYWbV9V6O","name":"Elli"] +["voice_id":"TxGEqnHWrfWFTfGW9XjX","name":"Josh"] +["voice_id":"21m00Tcm4TlvDq8ikWAM","name":"Rachel"] +["voice_id":"yoZ06aMxZJJ28mfd3POQ","name":"Sam"] + +""" + +import time +import requests + +from elevenlabs import generate + +from bridge.reply import Reply, ReplyType +from common.log import logger +from common.tmp_dir import TmpDir +from voice.voice import Voice +from config import conf + +XI_API_KEY = conf().get("xi_api_key") +name = conf().get("xi_voice_id") + +if name == "Adam": + voice_id = "pNInz6obpgDQGcFmaJgB" +elif name == "Antoni": + voice_id = "ErXwobaYiN019PkySvjV" +elif name == "Arnold": + voice_id = "VR6AewLTigWG4xSOukaG" +elif name == "Bella": + voice_id = "EXAVITQu4vr4xnSDxMaL" +elif name == "Domi": + voice_id = "AZnzlk1XvdvUeBnXmlld" +elif name == "Elli": + voice_id = "MF3mGyEYCl7XYWbV9V6O" +elif name == "Josh": + voice_id = "TxGEqnHWrfWFTfGW9XjX" +elif name == "Rachel": + voice_id = "21m00Tcm4TlvDq8ikWAM" +elif name == "Sam": + voice_id = "yoZ06aMxZJJ28mfd3POQ" + + +class ElevenLabsVoice(Voice): + + def __init__(self): + pass + + def voiceToText(self, voice_file): + pass + + def textToVoice(self, text): + url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" + headers = { + "Accept": "audio/mpeg", + "Content-Type": "application/json", + "xi-api-key": XI_API_KEY + } + data = { + "text": text, + "model_id": "eleven_monolingual_v1", + "voice_settings": { + "stability": 0, + "similarity_boost": 0 + } + } + response = requests.post(url, json=data, headers=headers) + audio = response.content + fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" + with open(fileName, "wb") as f: + f.write(audio) + logger.info("[ElevenLabs] textToVoice text={} voice file name={}".format(text, fileName)) + return Reply(ReplyType.VOICE, fileName) \ No newline at end of file diff --git a/voice/factory.py b/voice/factory.py index 45fe0d1..d591a4f 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -29,4 +29,8 @@ def create_voice(voice_type): from voice.azure.azure_voice import AzureVoice return AzureVoice() + elif voice_type == "elevenlabs": + from voice.elevent.elevent_voice import ElevenLabsVoice + + return ElevenLabsVoice() raise RuntimeError From db4998a56ba1e3ed7e0daa657b93e1a5d1b9c259 Mon Sep 17 00:00:00 2001 From: uezhenxiang2023 Date: Sun, 20 Aug 2023 10:58:26 +0800 Subject: [PATCH 2/2] replace requests with elevenlabs for audio generation --- voice/elevent/elevent_voice.py | 60 ++++------------------------------ 1 file changed, 7 insertions(+), 53 deletions(-) diff --git a/voice/elevent/elevent_voice.py b/voice/elevent/elevent_voice.py index 40a2aec..15936ab 100644 --- a/voice/elevent/elevent_voice.py +++ b/voice/elevent/elevent_voice.py @@ -1,22 +1,6 @@ -""" -eleventLabs voice service - -["voice_id":"pNInz6obpgDQGcFmaJgB","name":"Adam"] -["voice_id":"ErXwobaYiN019PkySvjV","name":"Antoni"] -["voice_id":"VR6AewLTigWG4xSOukaG","name":"Arnold"] -["voice_id":"EXAVITQu4vr4xnSDxMaL","name":"Bella"] -["voice_id":"AZnzlk1XvdvUeBnXmlld","name":"Domi"] -["voice_id":"MF3mGyEYCl7XYWbV9V6O","name":"Elli"] -["voice_id":"TxGEqnHWrfWFTfGW9XjX","name":"Josh"] -["voice_id":"21m00Tcm4TlvDq8ikWAM","name":"Rachel"] -["voice_id":"yoZ06aMxZJJ28mfd3POQ","name":"Sam"] - -""" - import time -import requests -from elevenlabs import generate +from elevenlabs import set_api_key,generate from bridge.reply import Reply, ReplyType from common.log import logger @@ -25,28 +9,9 @@ from voice.voice import Voice from config import conf XI_API_KEY = conf().get("xi_api_key") +set_api_key(XI_API_KEY) name = conf().get("xi_voice_id") -if name == "Adam": - voice_id = "pNInz6obpgDQGcFmaJgB" -elif name == "Antoni": - voice_id = "ErXwobaYiN019PkySvjV" -elif name == "Arnold": - voice_id = "VR6AewLTigWG4xSOukaG" -elif name == "Bella": - voice_id = "EXAVITQu4vr4xnSDxMaL" -elif name == "Domi": - voice_id = "AZnzlk1XvdvUeBnXmlld" -elif name == "Elli": - voice_id = "MF3mGyEYCl7XYWbV9V6O" -elif name == "Josh": - voice_id = "TxGEqnHWrfWFTfGW9XjX" -elif name == "Rachel": - voice_id = "21m00Tcm4TlvDq8ikWAM" -elif name == "Sam": - voice_id = "yoZ06aMxZJJ28mfd3POQ" - - class ElevenLabsVoice(Voice): def __init__(self): @@ -56,22 +21,11 @@ class ElevenLabsVoice(Voice): pass def textToVoice(self, text): - url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" - headers = { - "Accept": "audio/mpeg", - "Content-Type": "application/json", - "xi-api-key": XI_API_KEY - } - data = { - "text": text, - "model_id": "eleven_monolingual_v1", - "voice_settings": { - "stability": 0, - "similarity_boost": 0 - } - } - response = requests.post(url, json=data, headers=headers) - audio = response.content + audio = generate( + text=text, + voice=name, + model='eleven_multilingual_v1' + ) fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" with open(fileName, "wb") as f: f.write(audio)