From a2048bfc7e1663dc282f278d46cb56bdc9d30171 Mon Sep 17 00:00:00 2001 From: xuhu0115 <64852985+xuhu0115@users.noreply.github.com> Date: Wed, 26 Jun 2024 02:24:24 +0800 Subject: [PATCH] update zhipu in langchain --- ...6\216\245\345\205\245 LangChain (1).ipynb" | 971 ++++++++++++++++++ .../zhipuai_llm.py" | 20 +- 2 files changed, 983 insertions(+), 8 deletions(-) create mode 100644 "notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/1.LLM \346\216\245\345\205\245 LangChain (1).ipynb" diff --git "a/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/1.LLM \346\216\245\345\205\245 LangChain (1).ipynb" "b/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/1.LLM \346\216\245\345\205\245 LangChain (1).ipynb" new file mode 100644 index 0000000..f7f118c --- /dev/null +++ "b/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/1.LLM \346\216\245\345\205\245 LangChain (1).ipynb" @@ -0,0 +1,971 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cd27580d-a1f0-4548-a77d-368312687f8e", + "metadata": {}, + "source": [ + "# 4.1 将LLM 接入 LangChain" + ] + }, + { + "cell_type": "markdown", + "id": "249b0303-2d62-469c-a457-6cfcd42eb14e", + "metadata": {}, + "source": [ + "LangChain 为基于 LLM 开发自定义应用提供了高效的开发框架,便于开发者迅速地激发 LLM 的强大能力,搭建 LLM 应用。LangChain 也同样支持多种大模型,内置了 OpenAI、LLAMA 等大模型的调用接口。但是,LangChain 并没有内置所有大模型,它通过允许用户自定义 LLM 类型,来提供强大的可扩展性。" + ] + }, + { + "cell_type": "markdown", + "id": "cd6dae23-b591-4c26-99b8-db33a77693f9", + "metadata": {}, + "source": [ + "## 1. 基于 LangChain 调用 ChatGPT" + ] + }, + { + "cell_type": "markdown", + "id": "7073e6df-8d24-46fe-86b3-ac622177500a", + "metadata": {}, + "source": [ + "LangChain 提供了对于多种大模型的封装,基于 LangChain 的接口可以便捷地调用 ChatGPT 并将其集合在以 LangChain 为基础框架搭建的个人应用中。我们在此简述如何使用 LangChain 接口来调用 ChatGPT。\n", + "\n", + "注意,基于 LangChain 接口调用 ChatGPT 同样需要配置你的个人密钥,配置方法同上。" + ] + }, + { + "cell_type": "markdown", + "id": "db4bf770", + "metadata": {}, + "source": [ + "### 1.1 Models(模型)\n", + "从 `langchain.chat_models` 导入 `OpenAI` 的对话模型 `ChatOpenAI` 。 除去OpenAI以外,`langchain.chat_models` 还集成了其他对话模型,更多细节可以查看[Langchain官方文档](https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.chat_models)。" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "09245292-6ab4-4a45-b5c4-42f60acb6b39", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import openai\n", + "from dotenv import load_dotenv, find_dotenv\n", + "\n", + "# 读取本地/项目的环境变量。\n", + "\n", + "# find_dotenv()寻找并定位.env文件的路径\n", + "# load_dotenv()读取该.env文件,并将其中的环境变量加载到当前的运行环境中 \n", + "# 如果你设置的是全局的环境变量,这行代码则没有任何作用。\n", + "_ = load_dotenv(find_dotenv())\n", + "\n", + "# 获取环境变量 OPENAI_API_KEY\n", + "openai_api_key = os.environ['OPENAI_API_KEY']" + ] + }, + { + "cell_type": "markdown", + "id": "acecc1e6", + "metadata": {}, + "source": [ + "没有安装 langchain-openai 的话,请先运行下面进行代码!" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ee1f024b", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI" + ] + }, + { + "cell_type": "markdown", + "id": "08314bf7-7ab0-4bc9-b7eb-6dbf88f04a05", + "metadata": {}, + "source": [ + "接下来你需要实例化一个 ChatOpenAI 类,可以在实例化时传入超参数来控制回答,例如 `temperature` 参数。" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9db5579b-72be-4eda-a217-a9f30ad75b74", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ChatOpenAI(client=, async_client=, temperature=0.0, openai_api_key=SecretStr('**********'), openai_api_base='https://api.chatgptid.net/v1', openai_proxy='')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 这里我们将参数temperature设置为0.0,从而减少生成答案的随机性。\n", + "# 如果你想要每次得到不一样的有新意的答案,可以尝试调整该参数。\n", + "llm = ChatOpenAI(temperature=0.0)\n", + "llm" + ] + }, + { + "cell_type": "markdown", + "id": "247d3aad-f100-415b-b19b-a673242d18a0", + "metadata": {}, + "source": [ + "上面的 cell 假设你的 OpenAI API 密钥是在环境变量中设置的,如果您希望手动指定API密钥,请使用以下代码:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "93de6b9c-2cab-4716-81e5-b674fd714562", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(temperature=0, openai_api_key=\"YOUR_API_KEY\")" + ] + }, + { + "cell_type": "markdown", + "id": "8504c768-416e-48fb-85ed-21c8f05b3e2e", + "metadata": {}, + "source": [ + "可以看到,默认调用的是 ChatGPT-3.5 模型。另外,几种常用的超参数设置包括:\n", + "\n", + " · model_name:所要使用的模型,默认为 ‘gpt-3.5-turbo’,参数设置与 OpenAI 原生接口参数设置一致。\n", + "\n", + " · temperature:温度系数,取值同原生接口。\n", + "\n", + " · openai_api_key:OpenAI API key,如果不使用环境变量设置 API Key,也可以在实例化时设置。\n", + "\n", + " · openai_proxy:设置代理,如果不使用环境变量设置代理,也可以在实例化时设置。\n", + "\n", + " · streaming:是否使用流式传输,即逐字输出模型回答,默认为 False,此处不赘述。\n", + "\n", + " · max_tokens:模型输出的最大 token 数,意义及取值同上。" + ] + }, + { + "cell_type": "markdown", + "id": "27fb1b84", + "metadata": {}, + "source": [ + "当我们初始化了你选择的`LLM`后,我们就可以尝试使用它!让我们问一下“请你自我介绍一下自己!”" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9ef54a74", + "metadata": {}, + "outputs": [], + "source": [ + "output = llm.invoke(\"请你自我介绍一下自己!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ddb976f4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='你好,我是一个智能助手,专注于为用户提供各种服务和帮助。我可以回答问题、提供信息、解决问题,帮助用户更高效地完成工作和生活。如果您有任何疑问或需要帮助,请随时告诉我,我会尽力帮助您。感谢您的使用!', response_metadata={'token_usage': {'completion_tokens': 104, 'prompt_tokens': 20, 'total_tokens': 124}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_b28b39ffa8', 'finish_reason': 'stop', 'logprobs': None})" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output" + ] + }, + { + "cell_type": "markdown", + "id": "0aa226b2", + "metadata": {}, + "source": [ + "### 1.2 Prompt (提示模版)" + ] + }, + { + "cell_type": "markdown", + "id": "7199b8c8-4299-462b-aee1-109f66d49ca0", + "metadata": {}, + "source": [ + "在我们开发大模型应用时,大多数情况下不会直接将用户的输入直接传递给 LLM。通常,他们会将用户输入添加到一个较大的文本中,称为`提示模板`,该文本提供有关当前特定任务的附加上下文。\n", + "PromptTemplates 正是帮助解决这个问题!它们捆绑了从用户输入到完全格式化的提示的所有逻辑。这可以非常简单地开始 - 例如,生成上述字符串的提示就是:\n", + "\n", + "\n", + "我们需要先构造一个个性化 Template:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9acbd1ef-17f6-4513-a51b-2f489bbe7438", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "# 这里我们要求模型对给定文本进行中文翻译\n", + "prompt = \"\"\"请你将由三个反引号分割的文本翻译成英文!\\\n", + "text: ```{text}```\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "id": "8d459b03", + "metadata": {}, + "source": [ + "接下来让我们看一下构造好的完整的提示模版:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1c106f09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'请你将由三个反引号分割的文本翻译成英文!text: ```我带着比身体重的行李,游入尼罗河底,经过几道闪电 看到一堆光圈,不确定是不是这里。```\\n'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = \"我带着比身体重的行李,\\\n", + "游入尼罗河底,\\\n", + "经过几道闪电 看到一堆光圈,\\\n", + "不确定是不是这里。\\\n", + "\"\n", + "prompt.format(text=text)" + ] + }, + { + "cell_type": "markdown", + "id": "d81d1867", + "metadata": {}, + "source": [ + "我们知道聊天模型的接口是基于消息(message),而不是原始的文本。PromptTemplates 也可以用于产生消息列表,在这种样例中,`prompt`不仅包含了输入内容信息,也包含了每条`message`的信息(角色、在列表中的位置等)。通常情况下,一个 `ChatPromptTemplate` 是一个 `ChatMessageTemplate` 的列表。每个 `ChatMessageTemplate` 包含格式化该聊天消息的说明(其角色以及内容)。\n", + "\n", + "让我们一起看一个示例:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "01a4414f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content='你是一个翻译助手,可以帮助我将 中文 翻译成 英文.'),\n", + " HumanMessage(content='我带着比身体重的行李,游入尼罗河底,经过几道闪电 看到一堆光圈,不确定是不是这里。')]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.prompts.chat import ChatPromptTemplate\n", + "\n", + "template = \"你是一个翻译助手,可以帮助我将 {input_language} 翻译成 {output_language}.\"\n", + "human_template = \"{text}\"\n", + "\n", + "chat_prompt = ChatPromptTemplate.from_messages([\n", + " (\"system\", template),\n", + " (\"human\", human_template),\n", + "])\n", + "\n", + "text = \"我带着比身体重的行李,\\\n", + "游入尼罗河底,\\\n", + "经过几道闪电 看到一堆光圈,\\\n", + "不确定是不是这里。\\\n", + "\"\n", + "messages = chat_prompt.format_messages(input_language=\"中文\", output_language=\"英文\", text=text)\n", + "messages" + ] + }, + { + "cell_type": "markdown", + "id": "e7ab2c62", + "metadata": {}, + "source": [ + "接下来让我们调用定义好的`llm`和`messages`来输出回答:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3129e2e5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='I carried luggage heavier than my body and dived into the bottom of the Nile River. After passing through several flashes of lightning, I saw a pile of halos, not sure if this is the place.')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output = llm.invoke(messages)\n", + "output" + ] + }, + { + "cell_type": "markdown", + "id": "2d44004f", + "metadata": {}, + "source": [ + "### 1.3 Output parser(输出解析器)\n", + "\n", + "OutputParsers 将语言模型的原始输出转换为可以在下游使用的格式。 OutputParsers 有几种主要类型,包括:\n", + "- 将 LLM 文本转换为结构化信息(例如 JSON) \n", + "- 将 ChatMessage 转换为字符串 \n", + "- 将除消息之外的调用返回的额外信息(如 OpenAI 函数调用)转换为字符串\n", + "\n", + "最后,我们将模型输出传递给 `output_parser`,它是一个 `BaseOutputParser`,这意味着它接受**字符串或 BaseMessage 作为输入**。 StrOutputParser 特别简单地将任何输入转换为字符串。" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c64afdf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'I carried luggage heavier than my body and dived into the bottom of the Nile River. After passing through several flashes of lightning, I saw a pile of halos, not sure if this is the place.'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_core.output_parsers import StrOutputParser\n", + "\n", + "output_parser = StrOutputParser()\n", + "output_parser.invoke(output)" + ] + }, + { + "cell_type": "markdown", + "id": "ea44bd56", + "metadata": {}, + "source": [ + "从上面结果可以看到,我们通过输出解析器成功将 `ChatMessage` 类型的输出解析为了`字符串`" + ] + }, + { + "cell_type": "markdown", + "id": "de5fe94a", + "metadata": {}, + "source": [ + "### 1.4 完整的流程" + ] + }, + { + "cell_type": "markdown", + "id": "72a49104", + "metadata": {}, + "source": [ + "我们现在可以将所有这些组合成一条链。该链将获取输入变量,将这些变量传递给提示模板以创建提示,将提示传递给语言模型,然后通过(可选)输出解析器传递输出。接下来我们将使用LCEL这种语法去快速实现一条链(chain)。让我们看看它的实际效果!" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dbc6dae0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'I carried luggage heavier than my body and dived into the bottom of the Nile River. After passing through several flashes of lightning, I saw a pile of halos, not sure if this is the place.'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain = chat_prompt | llm | output_parser\n", + "chain.invoke({\"input_language\":\"中文\", \"output_language\":\"英文\",\"text\": text})\n" + ] + }, + { + "cell_type": "markdown", + "id": "4532af28", + "metadata": {}, + "source": [ + "再测试一个样例:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0440e251", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'我扛着比我的身体还重的行李,潜入尼罗河的底部。穿过几道闪电后,我看到一堆光环,不确定这是否就是目的地。'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = 'I carried luggage heavier than my body and dived into the bottom of the Nile River. After passing through several flashes of lightning, I saw a pile of halos, not sure if this is the place.'\n", + "chain.invoke({\"input_language\":\"英文\", \"output_language\":\"中文\",\"text\": text})" + ] + }, + { + "cell_type": "markdown", + "id": "1aa23207", + "metadata": {}, + "source": [ + "> 什么是 LCEL ? \n", + "LCEL(LangChain Expression Language,Langchain的表达式语言),LCEL是一种新的语法,是LangChain工具包的重要补充,他有许多优点,使得我们处理LangChain和代理更加简单方便。\n", + "\n", + "- LCEL提供了异步、批处理和流处理支持,使代码可以快速在不同服务器中移植。\n", + "- LCEL拥有后备措施,解决LLM格式输出的问题。\n", + "- LCEL增加了LLM的并行性,提高了效率。\n", + "- LCEL内置了日志记录,即使代理变得复杂,有助于理解复杂链条和代理的运行情况。\n", + "\n", + "用法示例:\n", + "\n", + "`chain = prompt | model | output_parser`\n", + "\n", + "上面代码中我们使用 LCEL 将不同的组件拼凑成一个链,在此链中,用户输入传递到提示模板,然后提示模板输出传递到模型,然后模型输出传递到输出解析器。| 的符号类似于 Unix 管道运算符,它将不同的组件链接在一起,将一个组件的输出作为下一个组件的输入。\n" + ] + }, + { + "cell_type": "markdown", + "id": "4da6fbbb-996b-4d77-b9ff-ecddbf055dcb", + "metadata": {}, + "source": [ + "## 2. 使用 LangChain 调用百度文心一言" + ] + }, + { + "cell_type": "markdown", + "id": "786e2c64-37a7-4e2d-8bfd-9fcf4bd753a9", + "metadata": {}, + "source": [ + "我们同样可以通过 LangChain 框架来调用百度文心大模型,以将文心模型接入到我们的应用框架中。" + ] + }, + { + "cell_type": "markdown", + "id": "33ddc109", + "metadata": {}, + "source": [ + "### 2.1 自定义 LLM 接入 langchain\n", + "在老版本中,LangChain 是不直接支持文心调用的,我们需要自定义一个支持文心模型调用的 LLM。在这里为了向用户展示如何自定义 LLM,我们在《附1.LangChain自定义LLM》中,简述了这种方法,也可参考[源文档](https://python.langchain.com/v0.1/docs/modules/model_io/llms/custom_llm/)。\n", + "\n", + "此处,我们可以直接调用已自定义好的 Wenxin_LLM,具体如何封装 Wenxin_LLM 见`wenxin_llm.py`。\n", + "\n", + "**注:以下代码需要将我们封装的代码[wenxin_llm.py](./wenxin_llm.py)下载到本 Notebook 的同级目录下,才可以直接使用。因为新版 LangChain 可以直接调用文心千帆 API,我们更推荐使用下一部分的代码来调用文心一言模型**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8505d726-4d2a-4bd4-a603-5edb1b204e05", + "metadata": {}, + "outputs": [], + "source": [ + "# 需要下载源码\n", + "from wenxin_llm import Wenxin_LLM" + ] + }, + { + "cell_type": "markdown", + "id": "127e3020-7fd2-4f8a-87e6-c088b1b773ab", + "metadata": {}, + "source": [ + "我们希望像调用 ChatGPT 那样直接将秘钥存储在 .env 文件中,并将其加载到环境变量,从而隐藏秘钥的具体细节,保证安全性。因此,我们需要在 .env 文件中配置 `QIANFAN_AK` 和 `QIANFAN_SK`,并使用以下代码加载:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bc0dd3cf-f0a3-46cc-8eff-465523acfbdc", + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import find_dotenv, load_dotenv\n", + "import os\n", + "\n", + "# 读取本地/项目的环境变量。\n", + "\n", + "# find_dotenv()寻找并定位.env文件的路径\n", + "# load_dotenv()读取该.env文件,并将其中的环境变量加载到当前的运行环境中\n", + "# 如果你设置的是全局的环境变量,这行代码则没有任何作用。\n", + "_ = load_dotenv(find_dotenv())\n", + "\n", + "# 获取环境变量 API_KEY\n", + "wenxin_api_key = os.environ[\"QIANFAN_AK\"]\n", + "wenxin_secret_key = os.environ[\"QIANFAN_SK\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0ced8d04-8f34-42eb-8eb3-0171ac5db0ba", + "metadata": {}, + "outputs": [], + "source": [ + "llm = Wenxin_LLM(api_key=wenxin_api_key, secret_key=wenxin_secret_key, system=\"你是一个助手!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a1e909be-889f-4ce9-9164-0410ff4ad73d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[INFO] [03-31 22:12:53] openapi_requestor.py:316 [t:27812]: requesting llm api endpoint: /chat/eb-instant\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "2\n" + ] + }, + { + "data": { + "text/plain": [ + "'你好!我是助手,负责协助您完成各种任务。我具备快速响应、高效执行和灵活适应的能力,致力于为您提供优质的服务。无论您需要什么帮助,我都会尽力满足您的需求。'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm.invoke(\"你好,请你自我介绍一下!\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2167d07a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[INFO] [03-31 22:12:41] openapi_requestor.py:316 [t:27812]: requesting llm api endpoint: /chat/eb-instant\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "2\n" + ] + }, + { + "data": { + "text/plain": [ + "'你好!我是助手,负责协助您完成各种任务。我具备快速学习和处理信息的能力,能够根据您的需求提供帮助和回答问题。无论您需要什么帮助,我都会尽力提供支持。'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 或者使用\n", + "llm(prompt=\"你好,请你自我介绍一下!\")" + ] + }, + { + "cell_type": "markdown", + "id": "b7b81514-3735-4e34-b4a8-2c43240307ca", + "metadata": {}, + "source": [ + "从而我们可以将文心大模型加入到 LangChain 架构中,实现在应用中对文心大模型的调用。" + ] + }, + { + "cell_type": "markdown", + "id": "7266bbe8", + "metadata": {}, + "source": [ + "### 2.2 在 langchain 直接调用文心一言\n", + "\n", + "我们也可以使用新版 LangChain,来直接调用文心一言大模型。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7312bb4d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from dotenv import find_dotenv, load_dotenv\n", + "import os\n", + "\n", + "# 读取本地/项目的环境变量。\n", + "\n", + "# find_dotenv()寻找并定位.env文件的路径\n", + "# load_dotenv()读取该.env文件,并将其中的环境变量加载到当前的运行环境中\n", + "# 如果你设置的是全局的环境变量,这行代码则没有任何作用。\n", + "_ = load_dotenv(find_dotenv())\n", + "\n", + "# 获取环境变量 API_KEY\n", + "QIANFAN_AK = os.environ[\"QIANFAN_AK\"]\n", + "QIANFAN_SK = os.environ[\"QIANFAN_SK\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65168734", + "metadata": {}, + "outputs": [], + "source": [ + "# Install required dependencies\n", + "%pip install -qU langchain langchain-community" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "73a934e3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "d:\\Miniconda\\miniconda3\\envs\\llm2\\lib\\site-packages\\langchain_core\\_api\\deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.7 and will be removed in 0.2.0. Use invoke instead.\n", + " warn_deprecated(\n", + "[INFO] [03-31 22:40:14] openapi_requestor.py:316 [t:3684]: requesting llm api endpoint: /chat/eb-instant\n", + "[INFO] [03-31 22:40:14] oauth.py:207 [t:3684]: trying to refresh access_token for ak `MxBM7W***`\n", + "[INFO] [03-31 22:40:15] oauth.py:220 [t:3684]: sucessfully refresh access_token\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "你好!我是文心一言,英文名是ERNIE Bot。我是一款人工智能语言模型,可以协助你完成范围广泛的任务并提供有关各种主题的信息,比如回答问题,提供定义和解释及建议,还能提供上下文知识和对话管理。如果你有任何问题或需要帮助,随时向我提问,我会尽力回答。\n" + ] + } + ], + "source": [ + "from langchain_community.llms import QianfanLLMEndpoint\n", + "\n", + "llm = QianfanLLMEndpoint(streaming=True)\n", + "res = llm(\"你好,请你自我介绍一下!\")\n", + "print(res)" + ] + }, + { + "cell_type": "markdown", + "id": "e5e8955d-530f-40cb-bbae-c90b9c1a4a67", + "metadata": {}, + "source": [ + "## 3. 讯飞星火" + ] + }, + { + "cell_type": "markdown", + "id": "c4b8d4ac-5519-46a7-b6cf-7ebe3511e607", + "metadata": {}, + "source": [ + "我们同样可以通过 LangChain 框架来调用讯飞星火大模型,更多信息参考[SparkLLM](https://python.langchain.com/docs/integrations/llms/sparkllm)" + ] + }, + { + "cell_type": "markdown", + "id": "55e831c3-b9b8-4617-ab95-6de61c7422b9", + "metadata": {}, + "source": [ + "我们希望像调用 ChatGPT 那样直接将秘钥存储在 .env 文件中,并将其加载到环境变量,从而隐藏秘钥的具体细节,保证安全性。因此,我们需要在 .env 文件中配置`IFLYTEK_SPARK_APP_ID`、 `IFLYTEK_SPARK_API_KEY` 和 `IFLYTEK_SPARK_API_SECRET`,并使用以下代码加载:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "73d91f2b-f78a-4dc1-b1bf-09ce96a76a7e", + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import find_dotenv, load_dotenv\n", + "import os\n", + "\n", + "# 读取本地/项目的环境变量。\n", + "\n", + "# find_dotenv()寻找并定位.env文件的路径\n", + "# load_dotenv()读取该.env文件,并将其中的环境变量加载到当前的运行环境中\n", + "# 如果你设置的是全局的环境变量,这行代码则没有任何作用。\n", + "_ = load_dotenv(find_dotenv())\n", + "\n", + "# 获取环境变量 API_KEY\n", + "IFLYTEK_SPARK_APP_ID = os.environ[\"IFLYTEK_SPARK_APP_ID\"]\n", + "IFLYTEK_SPARK_API_KEY = os.environ[\"IFLYTEK_SPARK_API_KEY\"]\n", + "IFLYTEK_SPARK_API_SECRET = os.environ[\"IFLYTEK_SPARK_API_SECRET\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6a6b1ff6", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_spark_params(model):\n", + " '''\n", + " 构造星火模型请求参数\n", + " '''\n", + "\n", + " spark_url_tpl = \"wss://spark-api.xf-yun.com/{}/chat\"\n", + " model_params_dict = {\n", + " # v1.5 版本\n", + " \"v1.5\": {\n", + " \"domain\": \"general\", # 用于配置大模型版本\n", + " \"spark_url\": spark_url_tpl.format(\"v1.1\") # 云端环境的服务地址\n", + " },\n", + " # v2.0 版本\n", + " \"v2.0\": {\n", + " \"domain\": \"generalv2\", # 用于配置大模型版本\n", + " \"spark_url\": spark_url_tpl.format(\"v2.1\") # 云端环境的服务地址\n", + " },\n", + " # v3.0 版本\n", + " \"v3.0\": {\n", + " \"domain\": \"generalv3\", # 用于配置大模型版本\n", + " \"spark_url\": spark_url_tpl.format(\"v3.1\") # 云端环境的服务地址\n", + " },\n", + " # v3.5 版本\n", + " \"v3.5\": {\n", + " \"domain\": \"generalv3.5\", # 用于配置大模型版本\n", + " \"spark_url\": spark_url_tpl.format(\"v3.5\") # 云端环境的服务地址\n", + " }\n", + " }\n", + " return model_params_dict[model]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "21f0359c-325e-43fc-911a-0fc1679fc3ce", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import SparkLLM\n", + "\n", + "spark_api_url = gen_spark_params(model=\"v1.5\")[\"spark_url\"]\n", + "\n", + "# Load the model(默认使用 v3.0)\n", + "llm = SparkLLM(spark_api_url = spark_api_url) #指定 v1.5版本" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3dbd0649", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "您好,我是科大讯飞研发的认知智能大模型,我的名字叫讯飞星火认知大模型。我可以和人类进行自然交流,解答问题,高效完成各领域认知智能需求。\n" + ] + } + ], + "source": [ + "res = llm(\"你好,请你自我介绍一下!\")\n", + "print(res)" + ] + }, + { + "cell_type": "markdown", + "id": "042a2428-d220-4da8-8ab1-45e29617768a", + "metadata": {}, + "source": [ + "从而我们可以将星火大模型加入到 LangChain 架构中,实现在应用中对文心大模型的调用。" + ] + }, + { + "cell_type": "markdown", + "id": "8fe38818-65c9-42f8-b3e3-f997e1e3cffa", + "metadata": {}, + "source": [ + "## 4. 使用 LangChain 调用智谱 GLM" + ] + }, + { + "cell_type": "markdown", + "id": "0b26b48b-ddd7-4cba-86c7-07f2046e3270", + "metadata": {}, + "source": [ + "我们同样可以通过 LangChain 框架来调用智谱 AI 大模型,以将其接入到我们的应用框架中。由于 langchain 中提供的[ChatGLM](https://python.langchain.com/docs/integrations/llms/chatglm)已不可用,因此我们需要自定义一个LLM。\n", + "\n", + "如果你使用的是智谱 GLM API,你需要将我们封装的代码[zhipuai_llm.py](./zhipuai_llm.py)下载到本 Notebook 的同级目录下,才可以运行下列代码来在 LangChain 中使用 GLM。\n", + "\n", + "根据智谱官方宣布以下模型即将弃用,在这些模型弃用后,会将它们自动路由至新的模型。请用户注意在弃用日期之前,将您的模型编码更新为最新版本,以确保服务的顺畅过渡,更多模型相关信息请访问[model](https://open.bigmodel.cn/dev/howuse/model)\n", + "\n", + "| 模型编码 |弃用日期|指向模型|\n", + "| ---- | ---- | ---- |\n", + "|chatglm_pro|2024 年 12 月 31 日|glm-4|\n", + "|chatglm_std|2024 年 12 月 31 日|glm-3-turbo|\n", + "|chatglm_lite|2024 年 12 月 31 日|glm-3-turbo|" + ] + }, + { + "cell_type": "markdown", + "id": "e3fe7cf9", + "metadata": {}, + "source": [ + "### 4.1 自定义chatglm接入 langchain" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "96644f3b", + "metadata": {}, + "outputs": [], + "source": [ + "from zhipuai_llm import ZhipuAILLM" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2ec52f07", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from dotenv import find_dotenv, load_dotenv\n", + "import os\n", + "\n", + "# 读取本地/项目的环境变量。\n", + "\n", + "# find_dotenv()寻找并定位.env文件的路径\n", + "# load_dotenv()读取该.env文件,并将其中的环境变量加载到当前的运行环境中\n", + "# 如果你设置的是全局的环境变量,这行代码则没有任何作用。\n", + "_ = load_dotenv(find_dotenv())\n", + "\n", + "# 获取环境变量 API_KEY\n", + "api_key = os.environ[\"ZHIPUAI_API_KEY\"] #填写控制台中获取的 APIKey 信息" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ef49232d", + "metadata": {}, + "outputs": [], + "source": [ + "zhipuai_model = ZhipuAILLM(model = \"glm-4\", temperature = 0.1, api_key = api_key) #model=\"glm-4-0520\", " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "73328a40", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'你好!我是智谱清言,是清华大学 KEG 实验室和智谱 AI 公司于 2023 年共同训练的语言模型。我的目标是通过回答用户提出的问题来帮助他们解决问题。由于我是一个计算机程序,所以我没有自我意识,也不能像人类一样感知世界。我只能通过分析我所学到的信息来回答问题。'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zhipuai_model(\"你好,请你自我介绍一下!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c77fd1fe", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm-universe", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git "a/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/zhipuai_llm.py" "b/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/zhipuai_llm.py" index 5d1fe56..4710136 100644 --- "a/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/zhipuai_llm.py" +++ "b/notebook/C4 \346\236\204\345\273\272 RAG \345\272\224\347\224\250/zhipuai_llm.py" @@ -1,11 +1,16 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- + from typing import Any, List, Mapping, Optional, Dict from langchain_core.callbacks.manager import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from zhipuai import ZhipuAI -# 继承自 langchain_core.language_models.llms.LLM +import os + +# 继承自 langchain.llms.base.LLM class ZhipuAILLM(LLM): - # 默认选用 glm-4 模型 + # 默认选用 glm-4 model: str = "glm-4" # 温度系数 temperature: float = 0.1 @@ -15,7 +20,10 @@ class ZhipuAILLM(LLM): def _call(self, prompt : str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any): - + client = ZhipuAI( + api_key = self.api_key + ) + def gen_glm_params(prompt): ''' 构造 GLM 模型请求参数 messages @@ -26,10 +34,6 @@ def gen_glm_params(prompt): messages = [{"role": "user", "content": prompt}] return messages - client = ZhipuAI( - api_key=self.api_key - ) - messages = gen_glm_params(prompt) response = client.chat.completions.create( model = self.model, @@ -45,7 +49,7 @@ def gen_glm_params(prompt): # 首先定义一个返回默认参数的方法 @property def _default_params(self) -> Dict[str, Any]: - """获取调用Ennie API的默认参数。""" + """获取调用API的默认参数。""" normal_params = { "temperature": self.temperature, }