Skip to content

Master 4.0 #2210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,5 @@ objdump*
TODO
experimental_mods
search_results
gg.docx
unstructured_reader.py
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ RUN echo '[global]' > /etc/pip.conf && \
echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf

# 语音输出功能(以下1,2行更换阿里源,第3,4行安装ffmpeg,都可以删除)
# 语音输出功能(以下1,2行更换阿里源,第3,4行安装ffmpeg,都可以删除)
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources && \
sed -i 's/security.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources && \
apt-get update
Expand All @@ -34,5 +34,7 @@ RUN uv venv --python=3.12 && uv pip install -r requirements.txt -i https://mirro
# # 非必要步骤,用于预热模块(可以删除)
RUN python -c 'from check_proxy import warm_up_modules; warm_up_modules()'

ENV CGO_ENABLED=0

# 启动(必要)
CMD ["bash", "-c", "python main.py"]
42 changes: 42 additions & 0 deletions check_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,48 @@ def warm_up_modules():
enc.encode("模块预热", disallowed_special=())
enc = model_info["gpt-4"]['tokenizer']
enc.encode("模块预热", disallowed_special=())
try_warm_up_vectordb()


# def try_warm_up_vectordb():
# try:
# import os
# import nltk
# target = os.path.expanduser('~/nltk_data')
# logger.info(f'模块预热: nltk punkt (从Github下载部分文件到 {target})')
# nltk.data.path.append(target)
# nltk.download('punkt', download_dir=target)
# logger.info('模块预热完成: nltk punkt')
# except:
# logger.exception('模块预热: nltk punkt 失败,可能需要手动安装 nltk punkt')
# logger.error('模块预热: nltk punkt 失败,可能需要手动安装 nltk punkt')


def try_warm_up_vectordb():
import os
import nltk
target = os.path.expanduser('~/nltk_data')
nltk.data.path.append(target)
try:
# 尝试加载 punkt
logger.info(f'nltk模块预热')
nltk.data.find('tokenizers/punkt')
nltk.data.find('tokenizers/punkt_tab')
nltk.data.find('taggers/averaged_perceptron_tagger_eng')
logger.info('nltk模块预热完成(读取本地缓存)')
except:
# 如果找不到,则尝试下载
try:
logger.info(f'模块预热: nltk punkt (从 Github 下载部分文件到 {target})')
from shared_utils.nltk_downloader import Downloader
_downloader = Downloader()
_downloader.download('punkt', download_dir=target)
_downloader.download('punkt_tab', download_dir=target)
_downloader.download('averaged_perceptron_tagger_eng', download_dir=target)
logger.info('nltk模块预热完成')
except Exception:
logger.exception('模块预热: nltk punkt 失败,可能需要手动安装 nltk punkt')


def warm_up_vectordb():
"""
Expand Down
30 changes: 17 additions & 13 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5",
"gpt-4", "gpt-4-32k", "azure-gpt-4", "glm-4", "glm-4v", "glm-3-turbo",
"gemini-1.5-pro", "chatglm3", "chatglm4",
"deepseek-chat", "deepseek-coder", "deepseek-reasoner",
"deepseek-chat", "deepseek-coder", "deepseek-reasoner",
"volcengine-deepseek-r1-250120", "volcengine-deepseek-v3-241226",
"dashscope-deepseek-r1", "dashscope-deepseek-v3",
"dashscope-qwen3-14b", "dashscope-qwen3-235b-a22b", "dashscope-qwen3-32b",
Expand Down Expand Up @@ -94,19 +94,19 @@

FONT = "Theme-Default-Font"
AVAIL_FONTS = [
"默认值(Theme-Default-Font)",
"宋体(SimSun)",
"黑体(SimHei)",
"楷体(KaiTi)",
"仿宋(FangSong)",
"默认值(Theme-Default-Font)",
"宋体(SimSun)",
"黑体(SimHei)",
"楷体(KaiTi)",
"仿宋(FangSong)",
"华文细黑(STHeiti Light)",
"华文楷体(STKaiti)",
"华文仿宋(STFangsong)",
"华文宋体(STSong)",
"华文中宋(STZhongsong)",
"华文新魏(STXinwei)",
"华文隶书(STLiti)",
# 备注:以下字体需要网络支持,您可以自定义任意您喜欢的字体,如下所示,需要满足的格式为 "字体昵称(字体英文真名@字体css下载链接)"
"华文楷体(STKaiti)",
"华文仿宋(STFangsong)",
"华文宋体(STSong)",
"华文中宋(STZhongsong)",
"华文新魏(STXinwei)",
"华文隶书(STLiti)",
# 备注:以下字体需要网络支持,您可以自定义任意您喜欢的字体,如下所示,需要满足的格式为 "字体昵称(字体英文真名@字体css下载链接)"
"思源宋体(Source Han Serif CN VF@https://chinese-fonts-cdn.deno.dev/packages/syst/dist/SourceHanSerifCN/result.css)",
"月星楷(Moon Stars Kai HW@https://chinese-fonts-cdn.deno.dev/packages/moon-stars-kai/dist/MoonStarsKaiHW-Regular/result.css)",
"珠圆体(MaokenZhuyuanTi@https://chinese-fonts-cdn.deno.dev/packages/mkzyt/dist/猫啃珠圆体/result.css)",
Expand Down Expand Up @@ -355,6 +355,10 @@
JINA_API_KEY = ""


# SEMANTIC SCHOLAR API KEY
SEMANTIC_SCHOLAR_KEY = ""


# 是否自动裁剪上下文长度(是否启动,默认不启动)
AUTO_CONTEXT_CLIP_ENABLE = False
# 目标裁剪上下文的token长度(如果超过这个长度,则会自动裁剪)
Expand Down
60 changes: 54 additions & 6 deletions crazy_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def get_crazy_functions():
from crazy_functions.SourceCode_Comment import 注释Python项目
from crazy_functions.SourceCode_Comment_Wrap import SourceCodeComment_Wrap
from crazy_functions.VideoResource_GPT import 多媒体任务
from crazy_functions.Document_Conversation import 批量文件询问
from crazy_functions.Document_Conversation_Wrap import Document_Conversation_Wrap


function_plugins = {
"多媒体智能体": {
Expand Down Expand Up @@ -378,7 +381,16 @@ def get_crazy_functions():
"Info": "PDF翻译中文,并重新编译PDF | 输入参数为路径",
"Function": HotReload(PDF翻译中文并重新编译PDF), # 当注册Class后,Function旧接口仅会在“虚空终端”中起作用
"Class": PDF_Localize # 新一代插件需要注册Class
}
},
"批量文件询问 (支持自定义总结各种文件)": {
"Group": "学术",
"Color": "stop",
"AsButton": False,
"AdvancedArgs": False,
"Info": "先上传文件,点击此按钮,进行提问",
"Function": HotReload(批量文件询问),
"Class": Document_Conversation_Wrap,
},
}

function_plugins.update(
Expand Down Expand Up @@ -414,8 +426,6 @@ def get_crazy_functions():




# -=--=- 尚未充分测试的实验性插件 & 需要额外依赖的插件 -=--=-
try:
from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要

Expand Down Expand Up @@ -696,6 +706,44 @@ def get_crazy_functions():
logger.error(trimmed_format_exc())
logger.error("Load function plugin failed")

try:
from crazy_functions.Document_Optimize import 自定义智能文档处理
function_plugins.update(
{
"一键处理文档(支持自定义全文润色、降重等)": {
"Group": "学术",
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder": "请输入处理指令和要求(可以详细描述),如:请帮我润色文本,要求幽默点。默认调用润色指令。",
"Info": "保留文档结构,智能处理文档内容 | 输入参数为文件路径",
"Function": HotReload(自定义智能文档处理)
},
}
)
except:
logger.error(trimmed_format_exc())
logger.error("Load function plugin failed")



try:
from crazy_functions.Paper_Reading import 快速论文解读
function_plugins.update(
{
"速读论文": {
"Group": "学术",
"Color": "stop",
"AsButton": False,
"Info": "上传一篇论文进行快速分析和解读 | 输入参数为论文路径或DOI/arXiv ID",
"Function": HotReload(快速论文解读),
},
}
)
except:
logger.error(trimmed_format_exc())
logger.error("Load function plugin failed")


# try:
# from crazy_functions.高级功能函数模板 import 测试图表渲染
Expand Down Expand Up @@ -744,12 +792,12 @@ def get_multiplex_button_functions():
"查互联网后回答":
"查互联网后回答",

"多模型对话":
"多模型对话":
"询问多个GPT模型", # 映射到上面的 `询问多个GPT模型` 插件

"智能召回 RAG":
"智能召回 RAG":
"Rag智能召回", # 映射到上面的 `Rag智能召回` 插件

"多媒体查询":
"多媒体查询":
"多媒体智能体", # 映射到上面的 `多媒体智能体` 插件
}
Loading