1 anno fa · c3d1711edc
--- a/README.md
+++ b/README.md
@@ -23,7 +23,6 @@ Demo made by [Visionn](https://www.wangpc.cc/)
 SaaS服务、私有化部署、稳定托管接入 等多种模式。
 >
 > 目前已在私域运营、智能客服、企业效率助手等场景积累了丰富的 AI 解决方案， 在电商、文教、健康、新消费等各行业沉淀了 AI 落地的最佳实践，致力于打造助力中小企业拥抱 AI 的一站式平台。

 企业服务和商用咨询可联系产品顾问：

 <img width="240" src="https://img-1317903499.cos.ap-guangzhou.myqcloud.com/docs/product-manager-qrcode.jpg">
--- a/bot/gemini/google_gemini_bot.py
+++ b/bot/gemini/google_gemini_bot.py
@@ -44,6 +44,7 @@ class GoogleGeminiBot(Bot):
        except Exception as e:
            logger.error("[Gemini] fetch reply error, may contain unsafe content")
            logger.error(e)
            return Reply(ReplyType.ERROR, "invoke [Gemini] api failed!")

    def _convert_to_gemini_messages(self, messages: list):
        res = []
@@ -63,6 +64,8 @@ class GoogleGeminiBot(Bot):
    def _filter_messages(self, messages: list):
        res = []
        turn = "user"
        if not messages:
            return res
        for i in range(len(messages) - 1, -1, -1):
            message = messages[i]
            if message.get("role") != turn:
--- a/bot/linkai/link_ai_bot.py
+++ b/bot/linkai/link_ai_bot.py
@@ -92,6 +92,7 @@ class LinkAIBot(Bot):
                "frequency_penalty": conf().get("frequency_penalty", 0.0),  # [-2,2]之间，该值越大则更倾向于产生不同的内容
                "presence_penalty": conf().get("presence_penalty", 0.0),  # [-2,2]之间，该值越大则更倾向于产生不同的内容
                "session_id": session_id,
                "sender_id": session_id,
                "channel_type": conf().get("channel_type", "wx")
            }
            try:
--- a/bot/xunfei/xunfei_spark_bot.py
+++ b/bot/xunfei/xunfei_spark_bot.py
@@ -47,7 +47,7 @@ class XunFeiBot(Bot):
        # 默认使用v2.0版本: "ws://spark-api.xf-yun.com/v2.1/chat"
        # v1.5版本为: "ws://spark-api.xf-yun.com/v1.1/chat"
        # v3.0版本为: "ws://spark-api.xf-yun.com/v3.1/chat"
        # 升级到v3.5版本，同时升级到wss协议，避免请求时出现11200错误码
        # v3.5版本为: "wss://spark-api.xf-yun.com/v3.5/chat"
        self.spark_url = "wss://spark-api.xf-yun.com/v3.5/chat"
        self.host = urlparse(self.spark_url).netloc
        self.path = urlparse(self.spark_url).path
--- a/bridge/reply.py
+++ b/bridge/reply.py
@@ -11,7 +11,7 @@ class ReplyType(Enum):
    VIDEO_URL = 5  # 视频URL
    FILE = 6  # 文件
    CARD = 7  # 微信名片，仅支持ntchat
    InviteRoom = 8  # 邀请好友进群
    INVITE_ROOM = 8  # 邀请好友进群
    INFO = 9
    ERROR = 10
    TEXT_ = 11  # 强制文本
--- a/channel/chat_channel.py
+++ b/channel/chat_channel.py
@@ -170,11 +170,13 @@ class ChatChannel(Channel):
        reply = self._generate_reply(context)

        logger.debug("[WX] ready to decorate reply: {}".format(reply))

        # reply的包装步骤
        reply = self._decorate_reply(context, reply)
        if reply and reply.content:
            reply = self._decorate_reply(context, reply)

        # reply的发送步骤
        self._send_reply(context, reply)
            # reply的发送步骤
            self._send_reply(context, reply)

    def _generate_reply(self, context: Context, reply: Reply = Reply()) -> Reply:
        e_context = PluginManager().emit_event(
--- a/channel/wechat/wechat_channel.py
+++ b/channel/wechat/wechat_channel.py
@@ -233,7 +233,6 @@ class WechatChannel(ChatChannel):
            logger.info("[WX] sendImage url={}, receiver={}".format(img_url, receiver))
        elif reply.type == ReplyType.IMAGE:  # 从文件读取图片
            image_storage = reply.content
            image_storage.seek(0)
            itchat.send_image(image_storage, toUserName=receiver)
            logger.info("[WX] sendImage, receiver={}".format(receiver))
        elif reply.type == ReplyType.FILE:  # 新增文件回复类型
--- a/config.py
+++ b/config.py
@@ -83,7 +83,7 @@ available_setting = {
    "voice_reply_voice": False,  # 是否使用语音回复语音，需要设置对应语音合成引擎的api key
    "always_reply_voice": False,  # 是否一直使用语音回复
    "voice_to_text": "openai",  # 语音识别引擎，支持openai,baidu,google,azure
    "text_to_voice": "openai",  # 语音合成引擎，支持openai,baidu,google,pytts(offline),azure,elevenlabs
    "text_to_voice": "openai",  # 语音合成引擎，支持openai,baidu,google,pytts(offline),azure,elevenlabs,edge(online)
    "text_to_voice_model": "tts-1",
    "tts_voice_id": "alloy",
    # baidu 语音api配置， 使用百度语音识别和语音合成时需要
--- a/plugins/plugin_manager.py
+++ b/plugins/plugin_manager.py
@@ -99,7 +99,7 @@ class PluginManager:
                    try:
                        self.current_plugin_path = plugin_path
                        if plugin_path in self.loaded:
                            if self.loaded[plugin_path] == None:
                            if plugin_name.upper() != 'GODCMD':
                                logger.info("reload module %s" % plugin_name)
                                self.loaded[plugin_path] = importlib.reload(sys.modules[import_path])
                                dependent_module_names = [name for name in sys.modules.keys() if name.startswith(import_path + ".")]
@@ -141,19 +141,21 @@ class PluginManager:
        failed_plugins = []
        for name, plugincls in self.plugins.items():
            if plugincls.enabled:
                if name not in self.instances:
                    try:
                        instance = plugincls()
                    except Exception as e:
                        logger.warn("Failed to init %s, diabled. %s" % (name, e))
                        self.disable_plugin(name)
                        failed_plugins.append(name)
                        continue
                    self.instances[name] = instance
                    for event in instance.handlers:
                        if event not in self.listening_plugins:
                            self.listening_plugins[event] = []
                        self.listening_plugins[event].append(name)
                if 'GODCMD' in self.instances and name == 'GODCMD':
                    continue
                # if name not in self.instances:
                try:
                    instance = plugincls()
                except Exception as e:
                    logger.warn("Failed to init %s, diabled. %s" % (name, e))
                    self.disable_plugin(name)
                    failed_plugins.append(name)
                    continue
                self.instances[name] = instance
                for event in instance.handlers:
                    if event not in self.listening_plugins:
                        self.listening_plugins[event] = []
                    self.listening_plugins[event].append(name)
        self.refresh_order()
        return failed_plugins

--- a/plugins/source.json
+++ b/plugins/source.json
@@ -20,5 +20,9 @@
      "url": "https://github.com/6vision/Apilot.git",
      "desc": "通过api直接查询早报、热榜、快递、天气等实用信息的插件"
    }
    "pictureChange": {
      "url": "https://github.com/Yanyutin753/pictureChange.git",
      "desc": "利用stable-diffusion和百度Ai进行图生图或者画图的插件"
    }
  }
 }
--- a/plugins/tool/tool.py
+++ b/plugins/tool/tool.py
@@ -137,7 +137,7 @@ class Tool(Plugin):

        return {
            # 全局配置相关
            "log": True,  # tool 日志开关
            "log": False,  # tool 日志开关
            "debug": kwargs.get("debug", False),  # 输出更多日志
            "no_default": kwargs.get("no_default", False),  # 不要默认的工具，只加载自己导入的工具
            "think_depth": kwargs.get("think_depth", 2),  # 一个问题最多使用多少次工具
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -7,6 +7,7 @@ gTTS>=2.3.1 # google text to speech
 pyttsx3>=2.90 # pytsx text to speech
 baidu_aip>=4.16.10 # baidu voice
 azure-cognitiveservices-speech # azure voice
 edge-tts # edge-tts
 numpy<=1.24.2
 langid # language detect

--- a/voice/audio_convert.py
+++ b/voice/audio_convert.py
@@ -64,7 +64,9 @@ def any_to_wav(any_path, wav_path):
    if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
        return sil_to_wav(any_path, wav_path)
    audio = AudioSegment.from_file(any_path)
    audio.export(wav_path, format="wav")
    audio.set_frame_rate(8000)    # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
    audio.set_channels(1)
    audio.export(wav_path, format="wav", codec='pcm_s16le')


 def any_to_sil(any_path, sil_path):
--- a/voice/baidu/baidu_voice.py
+++ b/voice/baidu/baidu_voice.py
@@ -62,7 +62,7 @@ class BaiduVoice(Voice):
        # 识别本地文件
        logger.debug("[Baidu] voice file name={}".format(voice_file))
        pcm = get_pcm_from_wav(voice_file)
        res = self.client.asr(pcm, "pcm", 16000, {"dev_pid": self.dev_id})
        res = self.client.asr(pcm, "pcm", 8000, {"dev_pid": self.dev_id})
        if res["err_no"] == 0:
            logger.info("百度语音识别到了：{}".format(res["result"]))
            text = "".join(res["result"])
--- a/voice/edge/edge_voice.py
+++ b/voice/edge/edge_voice.py
@@ -0,0 +1,50 @@
 import time

 import edge_tts
 import asyncio

 from bridge.reply import Reply, ReplyType
 from common.log import logger
 from common.tmp_dir import TmpDir
 from voice.voice import Voice


 class EdgeVoice(Voice):

    def __init__(self):
        '''
        # 普通话
        zh-CN-XiaoxiaoNeural
        zh-CN-XiaoyiNeural
        zh-CN-YunjianNeural
        zh-CN-YunxiNeural
        zh-CN-YunxiaNeural
        zh-CN-YunyangNeural
        # 地方口音
        zh-CN-liaoning-XiaobeiNeural
        zh-CN-shaanxi-XiaoniNeural
        # 粤语
        zh-HK-HiuGaaiNeural
        zh-HK-HiuMaanNeural
        zh-HK-WanLungNeural
        # 湾湾腔
        zh-TW-HsiaoChenNeural
        zh-TW-HsiaoYuNeural
        zh-TW-YunJheNeural
        '''
        self.voice = "zh-CN-YunjianNeural"

    def voiceToText(self, voice_file):
        pass

    async def gen_voice(self, text, fileName):
        communicate = edge_tts.Communicate(text, self.voice)
        await communicate.save(fileName)

    def textToVoice(self, text):
        fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".mp3"

        asyncio.run(self.gen_voice(text, fileName))

        logger.info("[EdgeTTS] textToVoice text={} voice file name={}".format(text, fileName))
        return Reply(ReplyType.VOICE, fileName)
--- a/voice/factory.py
+++ b/voice/factory.py
@@ -42,4 +42,8 @@ def create_voice(voice_type):
        from voice.ali.ali_voice import AliVoice

        return AliVoice()
    elif voice_type == "edge":
        from voice.edge.edge_voice import EdgeVoice

        return EdgeVoice()
    raise RuntimeError