From acbd506568d456acf612c88f7524a0bf0a6df434 Mon Sep 17 00:00:00 2001
From: uezhenxiang2023 <uezhenxiang@gmail.com>
Date: Sat, 19 Aug 2023 11:20:47 +0800
Subject: [PATCH 1/2] add ElevenLabs TTS to voice factory

---
 config.py                      |  5 ++-
 voice/elevent/elevent_voice.py | 79 ++++++++++++++++++++++++++++++++++
 voice/factory.py               |  4 ++
 3 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 voice/elevent/elevent_voice.py

diff --git a/config.py b/config.py
index fb5b407..a4bf4ef 100644
--- a/config.py
+++ b/config.py
@@ -61,7 +61,7 @@ available_setting = {
     "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
     "always_reply_voice": False,  # 是否一直使用语音回复
     "voice_to_text": "openai",  # 语音识别引擎，支持openai,baidu,google,azure
-    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline),azure
+    "text_to_voice": "baidu",  # 语音合成引擎，支持baidu,google,pytts(offline),azure,elevenlabs
     # baidu 语音api配置， 使用百度语音识别和语音合成时需要
     "baidu_app_id": "",
     "baidu_api_key": "",
@@ -71,6 +71,9 @@ available_setting = {
     # azure 语音api配置， 使用azure语音识别和语音合成时需要
     "azure_voice_api_key": "",
     "azure_voice_region": "japaneast",
+    # elevenlabs 语音api配置
+    "xi_api_key": "",    #获取ap的方法可以参考https://docs.elevenlabs.io/api-reference/quick-start/authentication
+    "xi_voice_id": "",   #ElevenLabs提供了9种英式、美式等英语发音id，分别是“Adam/Antoni/Arnold/Bella/Domi/Elli/Josh/Rachel/Sam”
     # 服务时间限制，目前支持itchat
     "chat_time_module": False,  # 是否开启服务时间限制
     "chat_start_time": "00:00",  # 服务开始时间
diff --git a/voice/elevent/elevent_voice.py b/voice/elevent/elevent_voice.py
new file mode 100644
index 0000000..40a2aec
--- /dev/null
+++ b/voice/elevent/elevent_voice.py
@@ -0,0 +1,79 @@
+"""
+eleventLabs voice service
+
+["voice_id":"pNInz6obpgDQGcFmaJgB","name":"Adam"]
+["voice_id":"ErXwobaYiN019PkySvjV","name":"Antoni"]
+["voice_id":"VR6AewLTigWG4xSOukaG","name":"Arnold"]
+["voice_id":"EXAVITQu4vr4xnSDxMaL","name":"Bella"]
+["voice_id":"AZnzlk1XvdvUeBnXmlld","name":"Domi"]
+["voice_id":"MF3mGyEYCl7XYWbV9V6O","name":"Elli"]
+["voice_id":"TxGEqnHWrfWFTfGW9XjX","name":"Josh"]
+["voice_id":"21m00Tcm4TlvDq8ikWAM","name":"Rachel"]
+["voice_id":"yoZ06aMxZJJ28mfd3POQ","name":"Sam"]
+
+"""
+
+import time
+import requests
+
+from elevenlabs import generate
+
+from bridge.reply import Reply, ReplyType
+from common.log import logger
+from common.tmp_dir import TmpDir
+from voice.voice import Voice
+from config import conf
+
+XI_API_KEY = conf().get("xi_api_key")
+name = conf().get("xi_voice_id")
+
+if name == "Adam":
+    voice_id = "pNInz6obpgDQGcFmaJgB"
+elif name == "Antoni":
+    voice_id = "ErXwobaYiN019PkySvjV"
+elif name == "Arnold":
+    voice_id = "VR6AewLTigWG4xSOukaG"
+elif name == "Bella":
+    voice_id = "EXAVITQu4vr4xnSDxMaL"
+elif name == "Domi":
+    voice_id = "AZnzlk1XvdvUeBnXmlld"
+elif name == "Elli":
+    voice_id = "MF3mGyEYCl7XYWbV9V6O"
+elif name == "Josh":
+    voice_id = "TxGEqnHWrfWFTfGW9XjX"
+elif name == "Rachel":
+    voice_id = "21m00Tcm4TlvDq8ikWAM"
+elif name == "Sam":
+    voice_id = "yoZ06aMxZJJ28mfd3POQ"
+
+
+class ElevenLabsVoice(Voice):
+
+    def __init__(self):
+        pass
+
+    def voiceToText(self, voice_file):
+        pass
+
+    def textToVoice(self, text):
+        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
+        headers = {
+            "Accept": "audio/mpeg",
+            "Content-Type": "application/json",
+            "xi-api-key": XI_API_KEY
+        }
+        data = {
+            "text": text,
+            "model_id": "eleven_monolingual_v1",
+            "voice_settings": {
+                "stability": 0,
+                "similarity_boost": 0
+            }
+        }
+        response = requests.post(url, json=data, headers=headers)
+        audio = response.content
+        fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
+        with open(fileName, "wb") as f:
+            f.write(audio)
+        logger.info("[ElevenLabs] textToVoice text={} voice file name={}".format(text, fileName))
+        return Reply(ReplyType.VOICE, fileName)
\ No newline at end of file
diff --git a/voice/factory.py b/voice/factory.py
index 45fe0d1..d591a4f 100644
--- a/voice/factory.py
+++ b/voice/factory.py
@@ -29,4 +29,8 @@ def create_voice(voice_type):
         from voice.azure.azure_voice import AzureVoice
 
         return AzureVoice()
+    elif voice_type == "elevenlabs":
+        from voice.elevent.elevent_voice import ElevenLabsVoice
+
+        return ElevenLabsVoice()
     raise RuntimeError

From db4998a56ba1e3ed7e0daa657b93e1a5d1b9c259 Mon Sep 17 00:00:00 2001
From: uezhenxiang2023 <uezhenxiang@gmail.com>
Date: Sun, 20 Aug 2023 10:58:26 +0800
Subject: [PATCH 2/2] replace requests with elevenlabs for audio generation

---
 voice/elevent/elevent_voice.py | 60 ++++------------------------------
 1 file changed, 7 insertions(+), 53 deletions(-)

diff --git a/voice/elevent/elevent_voice.py b/voice/elevent/elevent_voice.py
index 40a2aec..15936ab 100644
--- a/voice/elevent/elevent_voice.py
+++ b/voice/elevent/elevent_voice.py
@@ -1,22 +1,6 @@
-"""
-eleventLabs voice service
-
-["voice_id":"pNInz6obpgDQGcFmaJgB","name":"Adam"]
-["voice_id":"ErXwobaYiN019PkySvjV","name":"Antoni"]
-["voice_id":"VR6AewLTigWG4xSOukaG","name":"Arnold"]
-["voice_id":"EXAVITQu4vr4xnSDxMaL","name":"Bella"]
-["voice_id":"AZnzlk1XvdvUeBnXmlld","name":"Domi"]
-["voice_id":"MF3mGyEYCl7XYWbV9V6O","name":"Elli"]
-["voice_id":"TxGEqnHWrfWFTfGW9XjX","name":"Josh"]
-["voice_id":"21m00Tcm4TlvDq8ikWAM","name":"Rachel"]
-["voice_id":"yoZ06aMxZJJ28mfd3POQ","name":"Sam"]
-
-"""
-
 import time
-import requests
 
-from elevenlabs import generate
+from elevenlabs import set_api_key,generate
 
 from bridge.reply import Reply, ReplyType
 from common.log import logger
@@ -25,28 +9,9 @@ from voice.voice import Voice
 from config import conf
 
 XI_API_KEY = conf().get("xi_api_key")
+set_api_key(XI_API_KEY)
 name = conf().get("xi_voice_id")
 
-if name == "Adam":
-    voice_id = "pNInz6obpgDQGcFmaJgB"
-elif name == "Antoni":
-    voice_id = "ErXwobaYiN019PkySvjV"
-elif name == "Arnold":
-    voice_id = "VR6AewLTigWG4xSOukaG"
-elif name == "Bella":
-    voice_id = "EXAVITQu4vr4xnSDxMaL"
-elif name == "Domi":
-    voice_id = "AZnzlk1XvdvUeBnXmlld"
-elif name == "Elli":
-    voice_id = "MF3mGyEYCl7XYWbV9V6O"
-elif name == "Josh":
-    voice_id = "TxGEqnHWrfWFTfGW9XjX"
-elif name == "Rachel":
-    voice_id = "21m00Tcm4TlvDq8ikWAM"
-elif name == "Sam":
-    voice_id = "yoZ06aMxZJJ28mfd3POQ"
-
-
 class ElevenLabsVoice(Voice):
 
     def __init__(self):
@@ -56,22 +21,11 @@ class ElevenLabsVoice(Voice):
         pass
 
     def textToVoice(self, text):
-        url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
-        headers = {
-            "Accept": "audio/mpeg",
-            "Content-Type": "application/json",
-            "xi-api-key": XI_API_KEY
-        }
-        data = {
-            "text": text,
-            "model_id": "eleven_monolingual_v1",
-            "voice_settings": {
-                "stability": 0,
-                "similarity_boost": 0
-            }
-        }
-        response = requests.post(url, json=data, headers=headers)
-        audio = response.content
+        audio = generate(
+            text=text,
+            voice=name,
+            model='eleven_multilingual_v1'
+        )
         fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"
         with open(fileName, "wb") as f:
             f.write(audio)