diff --git a/README.md b/README.md index 46541d0..dadf801 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ Demo made by [Visionn](https://www.wangpc.cc/) SaaS服务、私有化部署、稳定托管接入 等多种模式。 > > 目前已在私域运营、智能客服、企业效率助手等场景积累了丰富的 AI 解决方案, 在电商、文教、健康、新消费等各行业沉淀了 AI 落地的最佳实践,致力于打造助力中小企业拥抱 AI 的一站式平台。 - 企业服务和商用咨询可联系产品顾问: diff --git a/bot/gemini/google_gemini_bot.py b/bot/gemini/google_gemini_bot.py index 1a49d60..e63f1cb 100644 --- a/bot/gemini/google_gemini_bot.py +++ b/bot/gemini/google_gemini_bot.py @@ -44,6 +44,7 @@ class GoogleGeminiBot(Bot): except Exception as e: logger.error("[Gemini] fetch reply error, may contain unsafe content") logger.error(e) + return Reply(ReplyType.ERROR, "invoke [Gemini] api failed!") def _convert_to_gemini_messages(self, messages: list): res = [] @@ -63,6 +64,8 @@ class GoogleGeminiBot(Bot): def _filter_messages(self, messages: list): res = [] turn = "user" + if not messages: + return res for i in range(len(messages) - 1, -1, -1): message = messages[i] if message.get("role") != turn: diff --git a/bot/linkai/link_ai_bot.py b/bot/linkai/link_ai_bot.py index f142d97..0df20bc 100644 --- a/bot/linkai/link_ai_bot.py +++ b/bot/linkai/link_ai_bot.py @@ -92,6 +92,7 @@ class LinkAIBot(Bot): "frequency_penalty": conf().get("frequency_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容 "presence_penalty": conf().get("presence_penalty", 0.0), # [-2,2]之间,该值越大则更倾向于产生不同的内容 "session_id": session_id, + "sender_id": session_id, "channel_type": conf().get("channel_type", "wx") } try: diff --git a/bot/xunfei/xunfei_spark_bot.py b/bot/xunfei/xunfei_spark_bot.py index b9e46ba..9ca6b96 100644 --- a/bot/xunfei/xunfei_spark_bot.py +++ b/bot/xunfei/xunfei_spark_bot.py @@ -47,7 +47,7 @@ class XunFeiBot(Bot): # 默认使用v2.0版本: "ws://spark-api.xf-yun.com/v2.1/chat" # v1.5版本为: "ws://spark-api.xf-yun.com/v1.1/chat" # v3.0版本为: "ws://spark-api.xf-yun.com/v3.1/chat" - # 升级到v3.5版本,同时升级到wss协议,避免请求时出现11200错误码 + # v3.5版本为: "wss://spark-api.xf-yun.com/v3.5/chat" self.spark_url = "wss://spark-api.xf-yun.com/v3.5/chat" self.host = urlparse(self.spark_url).netloc self.path = urlparse(self.spark_url).path diff --git a/bridge/reply.py b/bridge/reply.py index 0031484..f2293bd 100644 --- a/bridge/reply.py +++ b/bridge/reply.py @@ -11,7 +11,7 @@ class ReplyType(Enum): VIDEO_URL = 5 # 视频URL FILE = 6 # 文件 CARD = 7 # 微信名片,仅支持ntchat - InviteRoom = 8 # 邀请好友进群 + INVITE_ROOM = 8 # 邀请好友进群 INFO = 9 ERROR = 10 TEXT_ = 11 # 强制文本 diff --git a/channel/chat_channel.py b/channel/chat_channel.py index fe71207..907cea2 100644 --- a/channel/chat_channel.py +++ b/channel/chat_channel.py @@ -170,11 +170,13 @@ class ChatChannel(Channel): reply = self._generate_reply(context) logger.debug("[WX] ready to decorate reply: {}".format(reply)) + # reply的包装步骤 - reply = self._decorate_reply(context, reply) + if reply and reply.content: + reply = self._decorate_reply(context, reply) - # reply的发送步骤 - self._send_reply(context, reply) + # reply的发送步骤 + self._send_reply(context, reply) def _generate_reply(self, context: Context, reply: Reply = Reply()) -> Reply: e_context = PluginManager().emit_event( diff --git a/channel/wechat/wechat_channel.py b/channel/wechat/wechat_channel.py index 717b068..a65269c 100644 --- a/channel/wechat/wechat_channel.py +++ b/channel/wechat/wechat_channel.py @@ -233,7 +233,6 @@ class WechatChannel(ChatChannel): logger.info("[WX] sendImage url={}, receiver={}".format(img_url, receiver)) elif reply.type == ReplyType.IMAGE: # 从文件读取图片 image_storage = reply.content - image_storage.seek(0) itchat.send_image(image_storage, toUserName=receiver) logger.info("[WX] sendImage, receiver={}".format(receiver)) elif reply.type == ReplyType.FILE: # 新增文件回复类型 diff --git a/config.py b/config.py index 154c633..acfb6a6 100644 --- a/config.py +++ b/config.py @@ -83,7 +83,7 @@ available_setting = { "voice_reply_voice": False, # 是否使用语音回复语音,需要设置对应语音合成引擎的api key "always_reply_voice": False, # 是否一直使用语音回复 "voice_to_text": "openai", # 语音识别引擎,支持openai,baidu,google,azure - "text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),azure,elevenlabs + "text_to_voice": "openai", # 语音合成引擎,支持openai,baidu,google,pytts(offline),azure,elevenlabs,edge(online) "text_to_voice_model": "tts-1", "tts_voice_id": "alloy", # baidu 语音api配置, 使用百度语音识别和语音合成时需要 diff --git a/plugins/plugin_manager.py b/plugins/plugin_manager.py index 49c13ca..cecf75d 100644 --- a/plugins/plugin_manager.py +++ b/plugins/plugin_manager.py @@ -99,7 +99,7 @@ class PluginManager: try: self.current_plugin_path = plugin_path if plugin_path in self.loaded: - if self.loaded[plugin_path] == None: + if plugin_name.upper() != 'GODCMD': logger.info("reload module %s" % plugin_name) self.loaded[plugin_path] = importlib.reload(sys.modules[import_path]) dependent_module_names = [name for name in sys.modules.keys() if name.startswith(import_path + ".")] @@ -141,19 +141,21 @@ class PluginManager: failed_plugins = [] for name, plugincls in self.plugins.items(): if plugincls.enabled: - if name not in self.instances: - try: - instance = plugincls() - except Exception as e: - logger.warn("Failed to init %s, diabled. %s" % (name, e)) - self.disable_plugin(name) - failed_plugins.append(name) - continue - self.instances[name] = instance - for event in instance.handlers: - if event not in self.listening_plugins: - self.listening_plugins[event] = [] - self.listening_plugins[event].append(name) + if 'GODCMD' in self.instances and name == 'GODCMD': + continue + # if name not in self.instances: + try: + instance = plugincls() + except Exception as e: + logger.warn("Failed to init %s, diabled. %s" % (name, e)) + self.disable_plugin(name) + failed_plugins.append(name) + continue + self.instances[name] = instance + for event in instance.handlers: + if event not in self.listening_plugins: + self.listening_plugins[event] = [] + self.listening_plugins[event].append(name) self.refresh_order() return failed_plugins diff --git a/plugins/source.json b/plugins/source.json index d53c996..ebacc28 100644 --- a/plugins/source.json +++ b/plugins/source.json @@ -20,5 +20,9 @@ "url": "https://github.com/6vision/Apilot.git", "desc": "通过api直接查询早报、热榜、快递、天气等实用信息的插件" } + "pictureChange": { + "url": "https://github.com/Yanyutin753/pictureChange.git", + "desc": "利用stable-diffusion和百度Ai进行图生图或者画图的插件" + } } } diff --git a/plugins/tool/tool.py b/plugins/tool/tool.py index c80a945..a2ce4b6 100644 --- a/plugins/tool/tool.py +++ b/plugins/tool/tool.py @@ -137,7 +137,7 @@ class Tool(Plugin): return { # 全局配置相关 - "log": True, # tool 日志开关 + "log": False, # tool 日志开关 "debug": kwargs.get("debug", False), # 输出更多日志 "no_default": kwargs.get("no_default", False), # 不要默认的工具,只加载自己导入的工具 "think_depth": kwargs.get("think_depth", 2), # 一个问题最多使用多少次工具 diff --git a/requirements-optional.txt b/requirements-optional.txt index 74f1780..abb8a4e 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech pyttsx3>=2.90 # pytsx text to speech baidu_aip>=4.16.10 # baidu voice azure-cognitiveservices-speech # azure voice +edge-tts # edge-tts numpy<=1.24.2 langid # language detect diff --git a/voice/audio_convert.py b/voice/audio_convert.py index 18fe3c2..5c80528 100644 --- a/voice/audio_convert.py +++ b/voice/audio_convert.py @@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path): if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"): return sil_to_wav(any_path, wav_path) audio = AudioSegment.from_file(any_path) - audio.export(wav_path, format="wav") + audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别 + audio.set_channels(1) + audio.export(wav_path, format="wav", codec='pcm_s16le') def any_to_sil(any_path, sil_path): diff --git a/voice/baidu/baidu_voice.py b/voice/baidu/baidu_voice.py index fbf53ce..66ba4d8 100644 --- a/voice/baidu/baidu_voice.py +++ b/voice/baidu/baidu_voice.py @@ -62,7 +62,7 @@ class BaiduVoice(Voice): # 识别本地文件 logger.debug("[Baidu] voice file name={}".format(voice_file)) pcm = get_pcm_from_wav(voice_file) - res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id}) + res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id}) if res["err_no"] == 0: logger.info("百度语音识别到了:{}".format(res["result"])) text = "".join(res["result"]) diff --git a/voice/edge/edge_voice.py b/voice/edge/edge_voice.py new file mode 100644 index 0000000..7bb8b2e --- /dev/null +++ b/voice/edge/edge_voice.py @@ -0,0 +1,50 @@ +import time + +import edge_tts +import asyncio + +from bridge.reply import Reply, ReplyType +from common.log import logger +from common.tmp_dir import TmpDir +from voice.voice import Voice + + +class EdgeVoice(Voice): + + def __init__(self): + ''' + # 普通话 + zh-CN-XiaoxiaoNeural + zh-CN-XiaoyiNeural + zh-CN-YunjianNeural + zh-CN-YunxiNeural + zh-CN-YunxiaNeural + zh-CN-YunyangNeural + # 地方口音 + zh-CN-liaoning-XiaobeiNeural + zh-CN-shaanxi-XiaoniNeural + # 粤语 + zh-HK-HiuGaaiNeural + zh-HK-HiuMaanNeural + zh-HK-WanLungNeural + # 湾湾腔 + zh-TW-HsiaoChenNeural + zh-TW-HsiaoYuNeural + zh-TW-YunJheNeural + ''' + self.voice = "zh-CN-YunjianNeural" + + def voiceToText(self, voice_file): + pass + + async def gen_voice(self, text, fileName): + communicate = edge_tts.Communicate(text, self.voice) + await communicate.save(fileName) + + def textToVoice(self, text): + fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3" + + asyncio.run(self.gen_voice(text, fileName)) + + logger.info("[EdgeTTS] textToVoice text={} voice file name={}".format(text, fileName)) + return Reply(ReplyType.VOICE, fileName) diff --git a/voice/factory.py b/voice/factory.py index ed80758..bc9c9c3 100644 --- a/voice/factory.py +++ b/voice/factory.py @@ -42,4 +42,8 @@ def create_voice(voice_type): from voice.ali.ali_voice import AliVoice return AliVoice() + elif voice_type == "edge": + from voice.edge.edge_voice import EdgeVoice + + return EdgeVoice() raise RuntimeError